batch_jaro_winkler 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ext/batch_jaro_winkler/batch_jaro_winkler.c +104 -0
- data/ext/batch_jaro_winkler/ext/LICENSE.uthash.txt +20 -0
- data/ext/batch_jaro_winkler/ext/batch_jaro_winkler.c +890 -0
- data/ext/batch_jaro_winkler/ext/batch_jaro_winkler.h +50 -0
- data/ext/batch_jaro_winkler/ext/batch_jaro_winkler_internal.h +98 -0
- data/ext/batch_jaro_winkler/ext/batch_jaro_winkler_runtime.h +578 -0
- data/ext/batch_jaro_winkler/ext/uthash.h +1230 -0
- data/ext/batch_jaro_winkler/extconf.rb +5 -0
- data/lib/batch_jaro_winkler.rb +242 -0
- data/lib/batch_jaro_winkler/version.rb +3 -0
- metadata +77 -0
    
        checksums.yaml
    ADDED
    
    | @@ -0,0 +1,7 @@ | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            SHA256:
         | 
| 3 | 
            +
              metadata.gz: fa81343451beff7427878758a54f8e1af1b8cc2ee1905ab437bad420af0450c7
         | 
| 4 | 
            +
              data.tar.gz: a13aac63be06874621637a2f94c7256c4e5a5ffbd6f12316cc226aa05bad87f1
         | 
| 5 | 
            +
            SHA512:
         | 
| 6 | 
            +
              metadata.gz: c0fa50b077e593d2d69e89d5695c337f2bcbe755e390519af23d67e4d7893222f7576703ba0e1b5a09e2344d5f925762d5492bfe49a9925992e1ff588b841b27
         | 
| 7 | 
            +
              data.tar.gz: 1c0e9765a11ded4a92d0fb9cf675674b73e145c04bf41aab438510159879f2ccce56da064f6dd4170ed0da0c9b80171dd88020f48ce4cdb0d1cae94014b31788
         | 
| @@ -0,0 +1,104 @@ | |
| 1 | 
            +
             | 
| 2 | 
            +
             | 
| 3 | 
            +
            /*
         | 
| 4 | 
            +
             For this file to work with other ruby implementations than MRI, replace everything with:
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            #include "ext/batch_jaro_winkler.c"
         | 
| 7 | 
            +
            void	Init_batch_jaro_winkler(void){}
         | 
| 8 | 
            +
            */
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            #include "ext/batch_jaro_winkler.c"
         | 
| 11 | 
            +
            #include "ruby.h"
         | 
| 12 | 
            +
            #include "ruby/encoding.h"
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            VALUE	rb_bjw_build_runtime_result(VALUE self, VALUE tmp_store, VALUE rb_results, VALUE rb_c_results, VALUE rb_nb_results, VALUE rb_inp_encoded, VALUE rb_char_width)
         | 
| 15 | 
            +
            {
         | 
| 16 | 
            +
            	bjw_result	*results;
         | 
| 17 | 
            +
            	uint32_t	nb_results;
         | 
| 18 | 
            +
            	uint32_t	i_result;
         | 
| 19 | 
            +
            	VALUE		tmp_candidate;
         | 
| 20 | 
            +
            	rb_encoding	*utf32le_encoding;
         | 
| 21 | 
            +
            	rb_encoding	*utf8_encoding;
         | 
| 22 | 
            +
            	VALUE		rb_utf8_encoding;
         | 
| 23 | 
            +
            	uint32_t	char_width;
         | 
| 24 | 
            +
            	int			inp_encoded;
         | 
| 25 | 
            +
            	char		*all_candidates;
         | 
| 26 | 
            +
            	VALUE		rb_all_candidates;
         | 
| 27 | 
            +
            	uint64_t	total_nb_bytes;
         | 
| 28 | 
            +
            	uint64_t	decal;
         | 
| 29 | 
            +
            	uint64_t	bytes_len;
         | 
| 30 | 
            +
            	uint64_t	candidate_length_in_bytes;
         | 
| 31 | 
            +
            	uint64_t	i_char;
         | 
| 32 | 
            +
             | 
| 33 | 
            +
            	nb_results = (uint32_t)(NUM2ULL(rb_nb_results));
         | 
| 34 | 
            +
            	results = (bjw_result*)(NUM2ULL(rb_c_results));
         | 
| 35 | 
            +
            	char_width = (uint32_t)(NUM2ULL(rb_char_width));
         | 
| 36 | 
            +
            	inp_encoded = RTEST(rb_inp_encoded);
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            	utf32le_encoding = rb_enc_find("UTF-32LE");
         | 
| 39 | 
            +
            	utf8_encoding = rb_enc_find("UTF-8");
         | 
| 40 | 
            +
            	rb_utf8_encoding = rb_enc_from_encoding(utf8_encoding);
         | 
| 41 | 
            +
            	// We use tmp_store so that local ruby objects are marked by the GC
         | 
| 42 | 
            +
            	rb_ary_push(tmp_store, rb_utf8_encoding);
         | 
| 43 | 
            +
             | 
| 44 | 
            +
            	if (!inp_encoded)
         | 
| 45 | 
            +
            	{
         | 
| 46 | 
            +
            		total_nb_bytes = 0;
         | 
| 47 | 
            +
            		for (i_result = 0; i_result < nb_results; i_result++)
         | 
| 48 | 
            +
            			total_nb_bytes += results[i_result].candidate_length;
         | 
| 49 | 
            +
            		total_nb_bytes *= char_width;
         | 
| 50 | 
            +
            		all_candidates = malloc(total_nb_bytes);
         | 
| 51 | 
            +
            		if (!all_candidates)
         | 
| 52 | 
            +
            			return (Qfalse);
         | 
| 53 | 
            +
            		decal = 0;
         | 
| 54 | 
            +
            		for (i_result = 0; i_result < nb_results; i_result++)
         | 
| 55 | 
            +
            		{
         | 
| 56 | 
            +
            			bytes_len = results[i_result].candidate_length * char_width;
         | 
| 57 | 
            +
            			for (i_char = 0; i_char < bytes_len; i_char++)
         | 
| 58 | 
            +
            				all_candidates[decal + i_char] = ((char*)results[i_result].candidate)[i_char];
         | 
| 59 | 
            +
            			decal += bytes_len;
         | 
| 60 | 
            +
            		}
         | 
| 61 | 
            +
            		rb_all_candidates = rb_enc_str_new(all_candidates, total_nb_bytes, utf32le_encoding);
         | 
| 62 | 
            +
            		// We use tmp_store so that local ruby objects are marked by the GC
         | 
| 63 | 
            +
            		rb_ary_push(tmp_store, rb_all_candidates);
         | 
| 64 | 
            +
            		free(all_candidates);
         | 
| 65 | 
            +
            		rb_all_candidates = rb_str_encode(rb_all_candidates, rb_utf8_encoding, 0, Qnil);
         | 
| 66 | 
            +
            		// We use tmp_store so that local ruby objects are marked by the GC
         | 
| 67 | 
            +
            		rb_ary_push(tmp_store, rb_all_candidates);
         | 
| 68 | 
            +
            		all_candidates = RSTRING_PTR(rb_all_candidates);
         | 
| 69 | 
            +
            	}
         | 
| 70 | 
            +
             | 
| 71 | 
            +
            	decal = 0;
         | 
| 72 | 
            +
            	for (i_result = 0; i_result < nb_results; i_result++)
         | 
| 73 | 
            +
            	{
         | 
| 74 | 
            +
            		if (!inp_encoded)
         | 
| 75 | 
            +
            		{
         | 
| 76 | 
            +
            			candidate_length_in_bytes = 0;
         | 
| 77 | 
            +
            			for (i_char = 0; i_char < results[i_result].candidate_length; i_char++)
         | 
| 78 | 
            +
            			{
         | 
| 79 | 
            +
            				if ((all_candidates[decal + candidate_length_in_bytes] & 0xf8) == 0xf0)
         | 
| 80 | 
            +
            					candidate_length_in_bytes += 4;
         | 
| 81 | 
            +
            				else if ((all_candidates[decal + candidate_length_in_bytes] & 0xf0) == 0xe0)
         | 
| 82 | 
            +
            					candidate_length_in_bytes += 3;
         | 
| 83 | 
            +
            				else if ((all_candidates[decal + candidate_length_in_bytes] & 0xe0) == 0xc0)
         | 
| 84 | 
            +
            					candidate_length_in_bytes += 2;
         | 
| 85 | 
            +
            				else
         | 
| 86 | 
            +
            					candidate_length_in_bytes += 1;
         | 
| 87 | 
            +
            			}
         | 
| 88 | 
            +
            			tmp_candidate = rb_enc_str_new(all_candidates + decal, candidate_length_in_bytes, utf8_encoding);
         | 
| 89 | 
            +
            			decal += candidate_length_in_bytes;
         | 
| 90 | 
            +
            		}
         | 
| 91 | 
            +
            		else
         | 
| 92 | 
            +
            			tmp_candidate = rb_str_new(results[i_result].candidate, results[i_result].candidate_length * char_width);
         | 
| 93 | 
            +
            		rb_ary_push(rb_results, rb_ary_new_from_args(2, tmp_candidate, rb_float_new(results[i_result].score)));
         | 
| 94 | 
            +
            	}
         | 
| 95 | 
            +
            	return (Qtrue);
         | 
| 96 | 
            +
            }
         | 
| 97 | 
            +
             | 
| 98 | 
            +
            void	Init_batch_jaro_winkler(void)
         | 
| 99 | 
            +
            {
         | 
| 100 | 
            +
            	VALUE	cBatchJaroWinkler;
         | 
| 101 | 
            +
             | 
| 102 | 
            +
            	cBatchJaroWinkler = rb_define_module("BatchJaroWinkler");
         | 
| 103 | 
            +
            	rb_define_singleton_method(cBatchJaroWinkler, "rb_bjw_build_runtime_result", rb_bjw_build_runtime_result, 6);
         | 
| 104 | 
            +
            }
         | 
| @@ -0,0 +1,20 @@ | |
| 1 | 
            +
            Copyright (c) 2005-2018, Troy D. Hanson  http://troydhanson.github.com/uthash/
         | 
| 2 | 
            +
            All rights reserved.
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            Redistribution and use in source and binary forms, with or without
         | 
| 5 | 
            +
            modification, are permitted provided that the following conditions are met:
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                * Redistributions of source code must retain the above copyright
         | 
| 8 | 
            +
                  notice, this list of conditions and the following disclaimer.
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
         | 
| 11 | 
            +
            IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
         | 
| 12 | 
            +
            TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
         | 
| 13 | 
            +
            PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
         | 
| 14 | 
            +
            OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
         | 
| 15 | 
            +
            EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
         | 
| 16 | 
            +
            PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
         | 
| 17 | 
            +
            PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
         | 
| 18 | 
            +
            LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
         | 
| 19 | 
            +
            NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
         | 
| 20 | 
            +
            SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
         | 
| @@ -0,0 +1,890 @@ | |
| 1 | 
            +
            /*
         | 
| 2 | 
            +
            MIT License
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            Copyright (c) 2020 Dominik Bousquet https://github.com/dbousque/batch_jaro_winkler
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            Permission is hereby granted, free of charge, to any person obtaining a copy
         | 
| 7 | 
            +
            of this software and associated documentation files (the "Software"), to deal
         | 
| 8 | 
            +
            in the Software without restriction, including without limitation the rights
         | 
| 9 | 
            +
            to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
         | 
| 10 | 
            +
            copies of the Software, and to permit persons to whom the Software is
         | 
| 11 | 
            +
            furnished to do so, subject to the following conditions:
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            The above copyright notice and this permission notice shall be included in all
         | 
| 14 | 
            +
            copies or substantial portions of the Software.
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
         | 
| 17 | 
            +
            IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
         | 
| 18 | 
            +
            FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
         | 
| 19 | 
            +
            AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
         | 
| 20 | 
            +
            LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
         | 
| 21 | 
            +
            OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
         | 
| 22 | 
            +
            SOFTWARE.
         | 
| 23 | 
            +
            */
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            #include "batch_jaro_winkler_internal.h"
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            #define BJW_CHAR_TYPE uint32_t
         | 
| 28 | 
            +
            #define BJW_CHAR_ACCESS_TYPE uint32_t
         | 
| 29 | 
            +
            #include "batch_jaro_winkler_runtime.h"
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            #undef BJW_CHAR_ACCESS_TYPE
         | 
| 32 | 
            +
            #define BJW_CHAR_ACCESS_TYPE uint16_t
         | 
| 33 | 
            +
            #include "batch_jaro_winkler_runtime.h"
         | 
| 34 | 
            +
             | 
| 35 | 
            +
            #undef BJW_CHAR_ACCESS_TYPE
         | 
| 36 | 
            +
            #define BJW_CHAR_ACCESS_TYPE uint8_t
         | 
| 37 | 
            +
            #include "batch_jaro_winkler_runtime.h"
         | 
| 38 | 
            +
             | 
| 39 | 
            +
            #undef BJW_CHAR_TYPE
         | 
| 40 | 
            +
            #undef BJW_CHAR_ACCESS_TYPE
         | 
| 41 | 
            +
            #define BJW_CHAR_TYPE uint16_t
         | 
| 42 | 
            +
            #define BJW_CHAR_ACCESS_TYPE uint32_t
         | 
| 43 | 
            +
            #include "batch_jaro_winkler_runtime.h"
         | 
| 44 | 
            +
             | 
| 45 | 
            +
            #undef BJW_CHAR_ACCESS_TYPE
         | 
| 46 | 
            +
            #define BJW_CHAR_ACCESS_TYPE uint16_t
         | 
| 47 | 
            +
            #include "batch_jaro_winkler_runtime.h"
         | 
| 48 | 
            +
             | 
| 49 | 
            +
            #undef BJW_CHAR_ACCESS_TYPE
         | 
| 50 | 
            +
            #define BJW_CHAR_ACCESS_TYPE uint8_t
         | 
| 51 | 
            +
            #include "batch_jaro_winkler_runtime.h"
         | 
| 52 | 
            +
             | 
| 53 | 
            +
            #undef BJW_CHAR_TYPE
         | 
| 54 | 
            +
            #undef BJW_CHAR_ACCESS_TYPE
         | 
| 55 | 
            +
            #define BJW_CHAR_TYPE uint8_t
         | 
| 56 | 
            +
            #define BJW_CHAR_ACCESS_TYPE uint32_t
         | 
| 57 | 
            +
            #include "batch_jaro_winkler_runtime.h"
         | 
| 58 | 
            +
             | 
| 59 | 
            +
            #undef BJW_CHAR_ACCESS_TYPE
         | 
| 60 | 
            +
            #define BJW_CHAR_ACCESS_TYPE uint16_t
         | 
| 61 | 
            +
            #include "batch_jaro_winkler_runtime.h"
         | 
| 62 | 
            +
             | 
| 63 | 
            +
            #undef BJW_CHAR_ACCESS_TYPE
         | 
| 64 | 
            +
            #define BJW_CHAR_ACCESS_TYPE uint8_t
         | 
| 65 | 
            +
            #include "batch_jaro_winkler_runtime.h"
         | 
| 66 | 
            +
             | 
| 67 | 
            +
            #undef BJW_CHAR_TYPE
         | 
| 68 | 
            +
            #undef BJW_CHAR_ACCESS_TYPE
         | 
| 69 | 
            +
             | 
| 70 | 
            +
            static inline uint32_t	sorted_candidate_char_at(t_sorted_candidate *sorted_candidate, uint32_t i)
         | 
| 71 | 
            +
            {
         | 
| 72 | 
            +
            	uint32_t	res;
         | 
| 73 | 
            +
             | 
| 74 | 
            +
            	if (sorted_candidate->char_width == 4)
         | 
| 75 | 
            +
            		res = ((uint32_t*)sorted_candidate->candidate)[i];
         | 
| 76 | 
            +
            	else if (sorted_candidate->char_width == 2)
         | 
| 77 | 
            +
            		res = ((uint16_t*)sorted_candidate->candidate)[i];
         | 
| 78 | 
            +
            	else
         | 
| 79 | 
            +
            		res = ((uint8_t*)sorted_candidate->candidate)[i];
         | 
| 80 | 
            +
            	return (res);
         | 
| 81 | 
            +
            }
         | 
| 82 | 
            +
             | 
| 83 | 
            +
            static int		sort_by_length_and_alphabetical_order(const void *void_cand1, const void *void_cand2)
         | 
| 84 | 
            +
            {
         | 
| 85 | 
            +
            	t_sorted_candidate	*cand1;
         | 
| 86 | 
            +
            	t_sorted_candidate	*cand2;
         | 
| 87 | 
            +
            	uint32_t			i;
         | 
| 88 | 
            +
             | 
| 89 | 
            +
            	cand1 = (t_sorted_candidate*)void_cand1;
         | 
| 90 | 
            +
            	cand2 = (t_sorted_candidate*)void_cand2;
         | 
| 91 | 
            +
            	if (cand1->candidate_length < cand2->candidate_length)
         | 
| 92 | 
            +
            		return (-1);
         | 
| 93 | 
            +
            	if (cand1->candidate_length > cand2->candidate_length)
         | 
| 94 | 
            +
            		return (1);
         | 
| 95 | 
            +
            	for (i = 0; i < cand1->candidate_length && i < cand2->candidate_length && sorted_candidate_char_at(cand1, i) == sorted_candidate_char_at(cand2, i); i++){}
         | 
| 96 | 
            +
            	return (
         | 
| 97 | 
            +
            		i >= cand1->candidate_length && i >= cand2->candidate_length ? 0 :
         | 
| 98 | 
            +
            		i >= cand1->candidate_length ? -1 :
         | 
| 99 | 
            +
            		i >= cand2->candidate_length ? 1 :
         | 
| 100 | 
            +
            		sorted_candidate_char_at(cand1, i) < sorted_candidate_char_at(cand2, i) ? -1 :
         | 
| 101 | 
            +
            		1
         | 
| 102 | 
            +
            	);
         | 
| 103 | 
            +
            }
         | 
| 104 | 
            +
             | 
| 105 | 
            +
            static void		free_char_occurrences(t_char_occurrences *char_occurrences)
         | 
| 106 | 
            +
            {
         | 
| 107 | 
            +
            	t_char_occurrences			*tmp_char_occurrence;
         | 
| 108 | 
            +
            	t_char_occurrences			*tmp1;
         | 
| 109 | 
            +
            	t_tmp_candidate_occurrences	*candidate_occurrences;
         | 
| 110 | 
            +
            	t_tmp_candidate_occurrences	*tmp_candidate_occurrences;
         | 
| 111 | 
            +
            	t_tmp_candidate_occurrences	*tmp2;
         | 
| 112 | 
            +
             | 
| 113 | 
            +
            	HASH_ITER(hh, char_occurrences, tmp_char_occurrence, tmp1)
         | 
| 114 | 
            +
            	{
         | 
| 115 | 
            +
            		HASH_DEL(char_occurrences, tmp_char_occurrence);
         | 
| 116 | 
            +
            		candidate_occurrences = tmp_char_occurrence->candidates_occurrences;
         | 
| 117 | 
            +
            		HASH_ITER(hh, candidate_occurrences, tmp_candidate_occurrences, tmp2)
         | 
| 118 | 
            +
            		{
         | 
| 119 | 
            +
            			HASH_DEL(candidate_occurrences, tmp_candidate_occurrences);
         | 
| 120 | 
            +
            			free(tmp_candidate_occurrences->occ_indexes);
         | 
| 121 | 
            +
            			free(tmp_candidate_occurrences);
         | 
| 122 | 
            +
            		}
         | 
| 123 | 
            +
            		free(tmp_char_occurrence);
         | 
| 124 | 
            +
            	}
         | 
| 125 | 
            +
            }
         | 
| 126 | 
            +
             | 
| 127 | 
            +
            static void		*exit_build_exportable_model_for_thread_error(t_sorted_candidate *sorted_candidates, t_char_occurrences *char_occurrences)
         | 
| 128 | 
            +
            {
         | 
| 129 | 
            +
            	free(sorted_candidates);
         | 
| 130 | 
            +
            	free_char_occurrences(char_occurrences);
         | 
| 131 | 
            +
            	return (NULL);
         | 
| 132 | 
            +
            }
         | 
| 133 | 
            +
             | 
| 134 | 
            +
            static uint8_t	*build_exportable_model_for_thread(
         | 
| 135 | 
            +
            	void **original_candidates, uint32_t original_char_width, void **compressed_candidates, uint32_t compressed_char_width,
         | 
| 136 | 
            +
            	uint32_t char_access_width, uint32_t *candidates_lengths, uint32_t nb_candidates, float *min_scores, uint32_t *res_model_size
         | 
| 137 | 
            +
            )
         | 
| 138 | 
            +
            {
         | 
| 139 | 
            +
            	uint32_t					i_candidate;
         | 
| 140 | 
            +
            	uint32_t					i_char;
         | 
| 141 | 
            +
            	uint32_t					i_occurrence;
         | 
| 142 | 
            +
            	uint32_t					i_candidate_occurrrence;
         | 
| 143 | 
            +
            	// important to set to NULL for uthash
         | 
| 144 | 
            +
            	t_char_occurrences			*char_occurrences = NULL;
         | 
| 145 | 
            +
            	t_char_occurrences			*char_occurrence;
         | 
| 146 | 
            +
            	t_tmp_candidate_occurrences	*candidate_occurrences;
         | 
| 147 | 
            +
            	uint32_t					key;
         | 
| 148 | 
            +
            	uint32_t					total_candidates_lengths;
         | 
| 149 | 
            +
            	uint32_t					nb_char_matches;
         | 
| 150 | 
            +
            	uint32_t					nb_candidate_occurrences;
         | 
| 151 | 
            +
            	uint8_t						*model;
         | 
| 152 | 
            +
            	t_sorted_candidate			*sorted_candidates;
         | 
| 153 | 
            +
            	uint32_t					store_original_candidates;
         | 
| 154 | 
            +
             | 
| 155 | 
            +
            	store_original_candidates = original_candidates != compressed_candidates ? 1 : 0;
         | 
| 156 | 
            +
            	sorted_candidates = malloc(sizeof(t_sorted_candidate) * nb_candidates);
         | 
| 157 | 
            +
            	if (!sorted_candidates)
         | 
| 158 | 
            +
            		return (NULL);
         | 
| 159 | 
            +
            	for (i_candidate = 0; i_candidate < nb_candidates; i_candidate++)
         | 
| 160 | 
            +
            	{
         | 
| 161 | 
            +
            		sorted_candidates[i_candidate] = (t_sorted_candidate){
         | 
| 162 | 
            +
            			.original_ind = i_candidate,
         | 
| 163 | 
            +
            			.candidate = compressed_candidates[i_candidate],
         | 
| 164 | 
            +
            			.char_width = compressed_char_width,
         | 
| 165 | 
            +
            			.min_score = min_scores ? min_scores[i_candidate] : -1.0f,
         | 
| 166 | 
            +
            			.candidate_length = candidates_lengths[i_candidate]
         | 
| 167 | 
            +
            		};
         | 
| 168 | 
            +
            	}
         | 
| 169 | 
            +
             | 
| 170 | 
            +
            	// we sort to improve the runtime memory access pattern
         | 
| 171 | 
            +
            	qsort(sorted_candidates, nb_candidates, sizeof(t_sorted_candidate), &sort_by_length_and_alphabetical_order);
         | 
| 172 | 
            +
             | 
| 173 | 
            +
            	nb_char_matches = 0;
         | 
| 174 | 
            +
            	nb_candidate_occurrences = 0;
         | 
| 175 | 
            +
            	total_candidates_lengths = 0;
         | 
| 176 | 
            +
            	for (i_candidate = 0; i_candidate < nb_candidates; i_candidate++)
         | 
| 177 | 
            +
            	{
         | 
| 178 | 
            +
            		for (i_char = 0; i_char < sorted_candidates[i_candidate].candidate_length; i_char++)
         | 
| 179 | 
            +
            		{
         | 
| 180 | 
            +
            			// Find character matches
         | 
| 181 | 
            +
            			if (compressed_char_width == 4)
         | 
| 182 | 
            +
            				key = (uint32_t)(((uint32_t*)sorted_candidates[i_candidate].candidate)[i_char]);
         | 
| 183 | 
            +
            			else if (compressed_char_width == 2)
         | 
| 184 | 
            +
            				key = (uint32_t)(((uint16_t*)sorted_candidates[i_candidate].candidate)[i_char]);
         | 
| 185 | 
            +
            			else
         | 
| 186 | 
            +
            				key = (uint32_t)(((uint8_t*)sorted_candidates[i_candidate].candidate)[i_char]);
         | 
| 187 | 
            +
            			char_occurrence = NULL;
         | 
| 188 | 
            +
            			HASH_FIND(hh, char_occurrences, &key, sizeof(uint32_t), char_occurrence);
         | 
| 189 | 
            +
            			if (!char_occurrence) {
         | 
| 190 | 
            +
            				nb_char_matches++;
         | 
| 191 | 
            +
            				if (!(char_occurrence = malloc(sizeof(t_char_occurrences))))
         | 
| 192 | 
            +
            					return (exit_build_exportable_model_for_thread_error(sorted_candidates, char_occurrences));
         | 
| 193 | 
            +
            				char_occurrence->id = key;
         | 
| 194 | 
            +
            				// important to set to NULL for uthash
         | 
| 195 | 
            +
            				char_occurrence->candidates_occurrences = NULL;
         | 
| 196 | 
            +
            				if (store_original_candidates)
         | 
| 197 | 
            +
            				{
         | 
| 198 | 
            +
            					if (original_char_width == 4)
         | 
| 199 | 
            +
            						char_occurrence->original_representation = (uint32_t)(((uint32_t*)original_candidates[sorted_candidates[i_candidate].original_ind])[i_char]);
         | 
| 200 | 
            +
            					else if (original_char_width == 2)
         | 
| 201 | 
            +
            						char_occurrence->original_representation = (uint32_t)(((uint16_t*)original_candidates[sorted_candidates[i_candidate].original_ind])[i_char]);
         | 
| 202 | 
            +
            					else
         | 
| 203 | 
            +
            						char_occurrence->original_representation = (uint32_t)(((uint8_t*)original_candidates[sorted_candidates[i_candidate].original_ind])[i_char]);
         | 
| 204 | 
            +
            				}
         | 
| 205 | 
            +
            				HASH_ADD(hh, char_occurrences, id, sizeof(uint32_t), char_occurrence);
         | 
| 206 | 
            +
            			}
         | 
| 207 | 
            +
             | 
| 208 | 
            +
            			// Find character occurences for this candidate
         | 
| 209 | 
            +
            			key = i_candidate;
         | 
| 210 | 
            +
            			candidate_occurrences = NULL;
         | 
| 211 | 
            +
            			HASH_FIND(hh, char_occurrence->candidates_occurrences, &key, sizeof(uint32_t), candidate_occurrences);
         | 
| 212 | 
            +
            			if (!candidate_occurrences)
         | 
| 213 | 
            +
            			{
         | 
| 214 | 
            +
            				nb_candidate_occurrences++;
         | 
| 215 | 
            +
            				if (!(candidate_occurrences = malloc(sizeof(t_tmp_candidate_occurrences))))
         | 
| 216 | 
            +
            					return (exit_build_exportable_model_for_thread_error(sorted_candidates, char_occurrences));
         | 
| 217 | 
            +
            				candidate_occurrences->id = key;
         | 
| 218 | 
            +
            				candidate_occurrences->occ_indexes_len = 0;
         | 
| 219 | 
            +
            				candidate_occurrences->occ_indexes_size = 32;
         | 
| 220 | 
            +
            				candidate_occurrences->occ_indexes = malloc(char_access_width * candidate_occurrences->occ_indexes_size);
         | 
| 221 | 
            +
            				if (!candidate_occurrences->occ_indexes)
         | 
| 222 | 
            +
            				{
         | 
| 223 | 
            +
            					free(candidate_occurrences);
         | 
| 224 | 
            +
            					return (exit_build_exportable_model_for_thread_error(sorted_candidates, char_occurrences));
         | 
| 225 | 
            +
            				}
         | 
| 226 | 
            +
            				HASH_ADD(hh, char_occurrence->candidates_occurrences, id, sizeof(uint32_t), candidate_occurrences);
         | 
| 227 | 
            +
            			}
         | 
| 228 | 
            +
             | 
| 229 | 
            +
            			// Not big enough, increase size
         | 
| 230 | 
            +
            			if (candidate_occurrences->occ_indexes_len == candidate_occurrences->occ_indexes_size)
         | 
| 231 | 
            +
            			{
         | 
| 232 | 
            +
            				void *new_occ_indexes = malloc(char_access_width * candidate_occurrences->occ_indexes_size * 2);
         | 
| 233 | 
            +
            				if (!new_occ_indexes)
         | 
| 234 | 
            +
            					return (exit_build_exportable_model_for_thread_error(sorted_candidates, char_occurrences));
         | 
| 235 | 
            +
            				memcpy(new_occ_indexes, candidate_occurrences->occ_indexes, char_access_width * candidate_occurrences->occ_indexes_size);
         | 
| 236 | 
            +
            				candidate_occurrences->occ_indexes_size *= 2;
         | 
| 237 | 
            +
            				free(candidate_occurrences->occ_indexes);
         | 
| 238 | 
            +
            				candidate_occurrences->occ_indexes = new_occ_indexes;
         | 
| 239 | 
            +
            			}
         | 
| 240 | 
            +
             | 
| 241 | 
            +
            			if (char_access_width == 4)
         | 
| 242 | 
            +
            				((uint32_t*)candidate_occurrences->occ_indexes)[candidate_occurrences->occ_indexes_len] = i_char;
         | 
| 243 | 
            +
            			if (char_access_width == 2)
         | 
| 244 | 
            +
            				((uint16_t*)candidate_occurrences->occ_indexes)[candidate_occurrences->occ_indexes_len] = i_char;
         | 
| 245 | 
            +
            			else
         | 
| 246 | 
            +
            				((uint8_t*)candidate_occurrences->occ_indexes)[candidate_occurrences->occ_indexes_len] = i_char;
         | 
| 247 | 
            +
            			candidate_occurrences->occ_indexes_len++;
         | 
| 248 | 
            +
            		}
         | 
| 249 | 
            +
            		total_candidates_lengths += sorted_candidates[i_candidate].candidate_length;
         | 
| 250 | 
            +
            	}
         | 
| 251 | 
            +
             | 
| 252 | 
            +
            	// candidate_ind + nb_occurrences
         | 
| 253 | 
            +
            	uint32_t metadata_size = (sizeof(uint32_t) + char_access_width) * nb_candidate_occurrences;
         | 
| 254 | 
            +
            	uint32_t indexes_size = char_access_width * total_candidates_lengths;
         | 
| 255 | 
            +
            	uint32_t occurrences_size = metadata_size + indexes_size;
         | 
| 256 | 
            +
             | 
| 257 | 
            +
            	uint32_t total_size =
         | 
| 258 | 
            +
            		sizeof(uint32_t) +																// nb_candidates
         | 
| 259 | 
            +
            		sizeof(uint32_t) +																// total_candidates_lengths
         | 
| 260 | 
            +
            		sizeof(uint32_t) +																// min_scores present or not (uint32_t used to keep 4 bytes alignment)
         | 
| 261 | 
            +
            		sizeof(uint32_t) +																// nb_char_matches
         | 
| 262 | 
            +
            		sizeof(uint32_t) +																// nb_candidate_occurrences
         | 
| 263 | 
            +
            		sizeof(uint32_t) +																// store_original_candidates
         | 
| 264 | 
            +
            		sizeof(uint32_t) * nb_candidates * (min_scores ? 1 : 0) +						// min_scores - can go from 0.0 to 1.0 -> convert to uint32_t for cross-platform support
         | 
| 265 | 
            +
            		sizeof(uint32_t) * nb_char_matches +											// chars_occurrences_decals
         | 
| 266 | 
            +
            		sizeof(uint32_t) * nb_char_matches +											// nb_candidates_per_char_match
         | 
| 267 | 
            +
            		sizeof(uint32_t) * (nb_candidates + 1) +										// candidates_decal
         | 
| 268 | 
            +
            		original_char_width * total_candidates_lengths * store_original_candidates +	// original_candidates (if store_original_candidates)
         | 
| 269 | 
            +
            		compressed_char_width * total_candidates_lengths +								// candidates (compressed)
         | 
| 270 | 
            +
            		original_char_width * nb_char_matches * store_original_candidates +				// original_chars (if store_original_candidates)
         | 
| 271 | 
            +
            		compressed_char_width * nb_char_matches +										// chars
         | 
| 272 | 
            +
            		occurrences_size; 																// occurrences
         | 
| 273 | 
            +
             | 
| 274 | 
            +
            	if (!(model = malloc(total_size)))
         | 
| 275 | 
            +
            		return (exit_build_exportable_model_for_thread_error(sorted_candidates, char_occurrences));
         | 
| 276 | 
            +
            	uint8_t *res_buffer_head = model;
         | 
| 277 | 
            +
            	*((uint32_t*)res_buffer_head) = nb_candidates;
         | 
| 278 | 
            +
            	res_buffer_head += sizeof(uint32_t);
         | 
| 279 | 
            +
            	*((uint32_t*)res_buffer_head) = total_candidates_lengths;
         | 
| 280 | 
            +
            	res_buffer_head += sizeof(uint32_t);
         | 
| 281 | 
            +
            	*((uint32_t*)res_buffer_head) = min_scores ? 1 : 0;
         | 
| 282 | 
            +
            	res_buffer_head += sizeof(uint32_t);
         | 
| 283 | 
            +
            	*((uint32_t*)res_buffer_head) = nb_char_matches;
         | 
| 284 | 
            +
            	res_buffer_head += sizeof(uint32_t);
         | 
| 285 | 
            +
            	*((uint32_t*)res_buffer_head) = nb_candidate_occurrences;
         | 
| 286 | 
            +
            	res_buffer_head += sizeof(uint32_t);
         | 
| 287 | 
            +
            	*((uint32_t*)res_buffer_head) = store_original_candidates;
         | 
| 288 | 
            +
            	res_buffer_head += sizeof(uint32_t);
         | 
| 289 | 
            +
             | 
| 290 | 
            +
            	if (min_scores)
         | 
| 291 | 
            +
            	{
         | 
| 292 | 
            +
            		uint32_t *res_min_scores = (uint32_t*)res_buffer_head;
         | 
| 293 | 
            +
            		for (i_candidate = 0; i_candidate < nb_candidates; i_candidate++)
         | 
| 294 | 
            +
            		{
         | 
| 295 | 
            +
            			// To prevent rounding errors when min_score == 1.0f
         | 
| 296 | 
            +
            			if (sorted_candidates[i_candidate].min_score >= 1.0f)
         | 
| 297 | 
            +
            				res_min_scores[i_candidate] = UINT32_MAX;
         | 
| 298 | 
            +
            			else
         | 
| 299 | 
            +
            				res_min_scores[i_candidate] = (uint32_t)(sorted_candidates[i_candidate].min_score * UINT32_MAX);
         | 
| 300 | 
            +
            		}
         | 
| 301 | 
            +
            		res_buffer_head += sizeof(uint32_t) * nb_candidates;
         | 
| 302 | 
            +
            	}
         | 
| 303 | 
            +
             | 
| 304 | 
            +
            	uint32_t *chars_occurrences_decals = (uint32_t*)res_buffer_head;
         | 
| 305 | 
            +
            	res_buffer_head += sizeof(uint32_t) * nb_char_matches;
         | 
| 306 | 
            +
            	uint32_t *nb_candidates_per_char_match = (uint32_t*)res_buffer_head;
         | 
| 307 | 
            +
            	res_buffer_head += sizeof(uint32_t) * nb_char_matches;
         | 
| 308 | 
            +
             | 
| 309 | 
            +
            	uint32_t *candidates_decal = (uint32_t*)res_buffer_head;
         | 
| 310 | 
            +
            	uint32_t decal = 0;
         | 
| 311 | 
            +
            	for (i_candidate = 0; i_candidate < nb_candidates; i_candidate++)
         | 
| 312 | 
            +
            	{
         | 
| 313 | 
            +
            		candidates_decal[i_candidate] = decal;
         | 
| 314 | 
            +
            		decal += sorted_candidates[i_candidate].candidate_length;
         | 
| 315 | 
            +
            	}
         | 
| 316 | 
            +
            	candidates_decal[i_candidate] = decal;
         | 
| 317 | 
            +
            	res_buffer_head += sizeof(uint32_t) * (nb_candidates + 1);
         | 
| 318 | 
            +
             | 
| 319 | 
            +
            	void *res_original_candidates = res_buffer_head;
         | 
| 320 | 
            +
            	res_buffer_head += original_char_width * total_candidates_lengths * store_original_candidates;
         | 
| 321 | 
            +
            	void *res_compressed_candidates = res_buffer_head;
         | 
| 322 | 
            +
            	res_buffer_head += compressed_char_width * total_candidates_lengths;
         | 
| 323 | 
            +
            	uint32_t candidates_char_decal = 0;
         | 
| 324 | 
            +
            	for (i_candidate = 0; i_candidate < nb_candidates; i_candidate++)
         | 
| 325 | 
            +
            	{
         | 
| 326 | 
            +
            		if (store_original_candidates)
         | 
| 327 | 
            +
            		{
         | 
| 328 | 
            +
            			memcpy(
         | 
| 329 | 
            +
            				res_original_candidates + (candidates_char_decal * original_char_width),
         | 
| 330 | 
            +
            				original_candidates[sorted_candidates[i_candidate].original_ind],
         | 
| 331 | 
            +
            				sorted_candidates[i_candidate].candidate_length * original_char_width
         | 
| 332 | 
            +
            			);
         | 
| 333 | 
            +
            		}
         | 
| 334 | 
            +
            		memcpy(
         | 
| 335 | 
            +
            			res_compressed_candidates + (candidates_char_decal * compressed_char_width),
         | 
| 336 | 
            +
            			sorted_candidates[i_candidate].candidate,
         | 
| 337 | 
            +
            			sorted_candidates[i_candidate].candidate_length * compressed_char_width
         | 
| 338 | 
            +
            		);
         | 
| 339 | 
            +
            		candidates_char_decal += sorted_candidates[i_candidate].candidate_length;
         | 
| 340 | 
            +
            	}
         | 
| 341 | 
            +
             | 
| 342 | 
            +
            	void *original_chars = res_buffer_head;
         | 
| 343 | 
            +
            	res_buffer_head += original_char_width * nb_char_matches * store_original_candidates;
         | 
| 344 | 
            +
            	void *chars = res_buffer_head;
         | 
| 345 | 
            +
            	res_buffer_head += compressed_char_width * nb_char_matches;
         | 
| 346 | 
            +
             | 
| 347 | 
            +
            	uint8_t *occurrences = (uint8_t*)res_buffer_head;
         | 
| 348 | 
            +
            	uint8_t *occurrences_head = occurrences;
         | 
| 349 | 
            +
             | 
| 350 | 
            +
            	i_char = 0;
         | 
| 351 | 
            +
            	for (char_occurrence = char_occurrences; char_occurrence; char_occurrence = char_occurrence->hh.next)
         | 
| 352 | 
            +
            	{
         | 
| 353 | 
            +
            		if (store_original_candidates)
         | 
| 354 | 
            +
            		{
         | 
| 355 | 
            +
            			if (original_char_width == 4)
         | 
| 356 | 
            +
            				((uint32_t*)original_chars)[i_char] = char_occurrence->original_representation;
         | 
| 357 | 
            +
            			else if (original_char_width == 2)
         | 
| 358 | 
            +
            				((uint16_t*)original_chars)[i_char] = char_occurrence->original_representation;
         | 
| 359 | 
            +
            			else
         | 
| 360 | 
            +
            				((uint8_t*)original_chars)[i_char] = char_occurrence->original_representation;
         | 
| 361 | 
            +
            		}
         | 
| 362 | 
            +
            		if (compressed_char_width == 4)
         | 
| 363 | 
            +
            			((uint32_t*)chars)[i_char] = (uint32_t)char_occurrence->id;
         | 
| 364 | 
            +
            		else if (compressed_char_width == 2)
         | 
| 365 | 
            +
            			((uint16_t*)chars)[i_char] = (uint16_t)char_occurrence->id;
         | 
| 366 | 
            +
            		else
         | 
| 367 | 
            +
            			((uint8_t*)chars)[i_char] = (uint8_t)char_occurrence->id;
         | 
| 368 | 
            +
            		chars_occurrences_decals[i_char] = occurrences_head - occurrences;
         | 
| 369 | 
            +
             | 
| 370 | 
            +
            		i_candidate_occurrrence = 0;
         | 
| 371 | 
            +
            		for (candidate_occurrences = char_occurrence->candidates_occurrences; candidate_occurrences; candidate_occurrences = candidate_occurrences->hh.next)
         | 
| 372 | 
            +
            		{
         | 
| 373 | 
            +
            			// 1 uint32_t for the candidate's index
         | 
| 374 | 
            +
            			// + 1 BJW_CHAR_ACCESS_TYPE for the number of occurrences
         | 
| 375 | 
            +
            			// + N BJW_CHAR_ACCESS_TYPE for the occurrences indexes
         | 
| 376 | 
            +
            			*((uint32_t*)occurrences_head) = (uint32_t)candidate_occurrences->id;
         | 
| 377 | 
            +
            			occurrences_head += sizeof(uint32_t);
         | 
| 378 | 
            +
            			if (char_access_width == 4)
         | 
| 379 | 
            +
            				*((uint32_t*)occurrences_head) = candidate_occurrences->occ_indexes_len;
         | 
| 380 | 
            +
            			else if (char_access_width == 2)
         | 
| 381 | 
            +
            				*((uint16_t*)occurrences_head) = candidate_occurrences->occ_indexes_len;
         | 
| 382 | 
            +
            			else
         | 
| 383 | 
            +
            				*((uint8_t*)occurrences_head) = candidate_occurrences->occ_indexes_len;
         | 
| 384 | 
            +
            			occurrences_head += char_access_width;
         | 
| 385 | 
            +
            			for (i_occurrence = 0; i_occurrence < candidate_occurrences->occ_indexes_len; i_occurrence++)
         | 
| 386 | 
            +
            			{
         | 
| 387 | 
            +
            				if (char_access_width == 4)
         | 
| 388 | 
            +
            					*((uint32_t*)occurrences_head) = ((uint32_t*)candidate_occurrences->occ_indexes)[i_occurrence];
         | 
| 389 | 
            +
            				if (char_access_width == 2)
         | 
| 390 | 
            +
            					*((uint16_t*)occurrences_head) = ((uint16_t*)candidate_occurrences->occ_indexes)[i_occurrence];
         | 
| 391 | 
            +
            				else
         | 
| 392 | 
            +
            					*((uint8_t*)occurrences_head) = ((uint8_t*)candidate_occurrences->occ_indexes)[i_occurrence];
         | 
| 393 | 
            +
            				occurrences_head += char_access_width;
         | 
| 394 | 
            +
            			}
         | 
| 395 | 
            +
             | 
| 396 | 
            +
            			i_candidate_occurrrence++;
         | 
| 397 | 
            +
            		}
         | 
| 398 | 
            +
             | 
| 399 | 
            +
            		nb_candidates_per_char_match[i_char] = i_candidate_occurrrence;
         | 
| 400 | 
            +
            		i_char++;
         | 
| 401 | 
            +
            	}
         | 
| 402 | 
            +
             | 
| 403 | 
            +
            	*res_model_size = total_size;
         | 
| 404 | 
            +
            	free_char_occurrences(char_occurrences);
         | 
| 405 | 
            +
            	free(sorted_candidates);
         | 
| 406 | 
            +
             | 
| 407 | 
            +
            	return (model);
         | 
| 408 | 
            +
            }
         | 
| 409 | 
            +
             | 
| 410 | 
            +
            // pack all result data in single buffer
         | 
| 411 | 
            +
            static void		*build_exportable_model(
         | 
| 412 | 
            +
            	void **original_candidates, uint32_t original_char_width, void **compressed_candidates, uint32_t compressed_char_width,
         | 
| 413 | 
            +
            	uint32_t char_access_width, uint32_t *candidates_lengths, uint32_t nb_candidates, float *min_scores, uint32_t nb_runtime_threads, uint32_t *res_model_size
         | 
| 414 | 
            +
            )
         | 
| 415 | 
            +
            {
         | 
| 416 | 
            +
            	uint32_t			i_thread;
         | 
| 417 | 
            +
            	uint32_t			i;
         | 
| 418 | 
            +
            	uint8_t				*model_per_thread[nb_runtime_threads];
         | 
| 419 | 
            +
            	uint32_t			model_size_per_thread[nb_runtime_threads];
         | 
| 420 | 
            +
            	uint8_t				*res_buffer;
         | 
| 421 | 
            +
            	uint8_t				*res_buffer_head;
         | 
| 422 | 
            +
            	void				**original_candidates_for_thread;
         | 
| 423 | 
            +
            	void				**compressed_candidates_for_thread;
         | 
| 424 | 
            +
            	uint32_t			*candidates_lengths_for_thread;
         | 
| 425 | 
            +
            	uint32_t			nb_candidates_for_thread;
         | 
| 426 | 
            +
            	float				*min_scores_for_thread;
         | 
| 427 | 
            +
            	uint32_t			nb_taken_candidates;
         | 
| 428 | 
            +
            	uint32_t			aligned_model_size;
         | 
| 429 | 
            +
             | 
| 430 | 
            +
            	nb_taken_candidates = 0;
         | 
| 431 | 
            +
            	*res_model_size = 0;
         | 
| 432 | 
            +
            	for (i_thread = 0; i_thread < nb_runtime_threads; i_thread++)
         | 
| 433 | 
            +
            	{
         | 
| 434 | 
            +
            		nb_candidates_for_thread = i_thread == nb_runtime_threads - 1 ? nb_candidates - nb_taken_candidates : (nb_candidates / nb_runtime_threads);
         | 
| 435 | 
            +
            		original_candidates_for_thread = original_candidates + nb_taken_candidates;
         | 
| 436 | 
            +
            		compressed_candidates_for_thread = compressed_candidates + nb_taken_candidates;
         | 
| 437 | 
            +
            		candidates_lengths_for_thread = candidates_lengths + nb_taken_candidates;
         | 
| 438 | 
            +
            		min_scores_for_thread = min_scores ? min_scores + nb_taken_candidates : NULL;
         | 
| 439 | 
            +
             | 
| 440 | 
            +
            		model_per_thread[i_thread] = build_exportable_model_for_thread(
         | 
| 441 | 
            +
            			original_candidates_for_thread, original_char_width, compressed_candidates_for_thread, compressed_char_width,
         | 
| 442 | 
            +
            			char_access_width, candidates_lengths_for_thread, nb_candidates_for_thread, min_scores_for_thread, &(model_size_per_thread[i_thread])
         | 
| 443 | 
            +
            		);
         | 
| 444 | 
            +
            		if (!model_per_thread[i_thread])
         | 
| 445 | 
            +
            		{
         | 
| 446 | 
            +
            			for (i = 0; i < i_thread; i++)
         | 
| 447 | 
            +
            				free(model_per_thread[i]);
         | 
| 448 | 
            +
            			return (NULL);
         | 
| 449 | 
            +
            		}
         | 
| 450 | 
            +
            		// align on next 4 byte boundary
         | 
| 451 | 
            +
            		*res_model_size += model_size_per_thread[i_thread] + (4 - (model_size_per_thread[i_thread] % 4));
         | 
| 452 | 
            +
            		nb_taken_candidates += nb_candidates_for_thread;
         | 
| 453 | 
            +
            	}
         | 
| 454 | 
            +
             | 
| 455 | 
            +
            	// we put the number of threads + nb candidates + char_width + char_access_width + original_char_width and the models per thread sizes at the start of the model
         | 
| 456 | 
            +
            	*res_model_size += sizeof(uint32_t) * (nb_runtime_threads + 5);
         | 
| 457 | 
            +
            	res_buffer = malloc(*res_model_size);
         | 
| 458 | 
            +
            	if (!res_buffer)
         | 
| 459 | 
            +
            	{
         | 
| 460 | 
            +
            		for (i_thread = 0; i_thread < nb_runtime_threads; i_thread++)
         | 
| 461 | 
            +
            			free(model_per_thread[i_thread]);
         | 
| 462 | 
            +
            		return (NULL);
         | 
| 463 | 
            +
            	}
         | 
| 464 | 
            +
            	*((uint32_t*)(res_buffer + sizeof(uint32_t) * 0)) = nb_runtime_threads;
         | 
| 465 | 
            +
            	*((uint32_t*)(res_buffer + sizeof(uint32_t) * 1)) = nb_candidates;
         | 
| 466 | 
            +
            	*((uint32_t*)(res_buffer + sizeof(uint32_t) * 2)) = compressed_char_width;
         | 
| 467 | 
            +
            	*((uint32_t*)(res_buffer + sizeof(uint32_t) * 3)) = char_access_width;
         | 
| 468 | 
            +
            	*((uint32_t*)(res_buffer + sizeof(uint32_t) * 4)) = original_char_width;
         | 
| 469 | 
            +
            	res_buffer_head = res_buffer + sizeof(uint32_t) * (nb_runtime_threads + 5);
         | 
| 470 | 
            +
            	for (i_thread = 0; i_thread < nb_runtime_threads; i_thread++)
         | 
| 471 | 
            +
            	{
         | 
| 472 | 
            +
            		// align on next 4 byte boundary
         | 
| 473 | 
            +
            		aligned_model_size = model_size_per_thread[i_thread] + (4 - (model_size_per_thread[i_thread] % 4));
         | 
| 474 | 
            +
            		*((uint32_t*)(res_buffer + sizeof(uint32_t) * (i_thread + 5))) = aligned_model_size;
         | 
| 475 | 
            +
            		memcpy(res_buffer_head, model_per_thread[i_thread], model_size_per_thread[i_thread]);
         | 
| 476 | 
            +
            		// align on next 4 byte boundary
         | 
| 477 | 
            +
            		res_buffer_head += aligned_model_size;
         | 
| 478 | 
            +
            		free(model_per_thread[i_thread]);
         | 
| 479 | 
            +
            	}
         | 
| 480 | 
            +
             | 
| 481 | 
            +
            	return (res_buffer);
         | 
| 482 | 
            +
            }
         | 
| 483 | 
            +
             | 
| 484 | 
            +
            static void		free_chars(t_char *chars)
         | 
| 485 | 
            +
            {
         | 
| 486 | 
            +
            	t_char	*tmp_char;
         | 
| 487 | 
            +
            	t_char	*tmp;
         | 
| 488 | 
            +
             | 
| 489 | 
            +
            	HASH_ITER(hh, chars, tmp_char, tmp)
         | 
| 490 | 
            +
            	{
         | 
| 491 | 
            +
            		HASH_DEL(chars, tmp_char);
         | 
| 492 | 
            +
            		free(tmp_char);
         | 
| 493 | 
            +
            	}
         | 
| 494 | 
            +
            }
         | 
| 495 | 
            +
             | 
| 496 | 
            +
            // Used by the ruby library
         | 
| 497 | 
            +
            void	_bjw_free(void *ptr)
         | 
| 498 | 
            +
            {
         | 
| 499 | 
            +
            	free(ptr);
         | 
| 500 | 
            +
            }
         | 
| 501 | 
            +
             | 
| 502 | 
            +
            // Pack all result data in single buffer.
         | 
| 503 | 
            +
            void	*bjw_build_exportable_model(void **candidates, uint32_t char_width, uint32_t *candidates_lengths, uint32_t nb_candidates, float *min_scores, uint32_t nb_runtime_threads, uint32_t *res_model_size)
         | 
| 504 | 
            +
            {
         | 
| 505 | 
            +
            	uint32_t	i_candidate;
         | 
| 506 | 
            +
            	uint32_t	i_char;
         | 
| 507 | 
            +
            	uint32_t	i;
         | 
| 508 | 
            +
            	uint32_t	longest_candidate;
         | 
| 509 | 
            +
            	// important to set to NULL for uthash
         | 
| 510 | 
            +
            	t_char		*chars = NULL;
         | 
| 511 | 
            +
            	t_char		*chr;
         | 
| 512 | 
            +
            	uint32_t	key;
         | 
| 513 | 
            +
            	uint32_t	nb_chars;
         | 
| 514 | 
            +
            	uint32_t	compressed_char_width;
         | 
| 515 | 
            +
            	uint32_t	char_access_width;
         | 
| 516 | 
            +
            	void		**compressed_candidates;
         | 
| 517 | 
            +
            	void		*exportable_model;
         | 
| 518 | 
            +
             | 
| 519 | 
            +
            	nb_chars = 0;
         | 
| 520 | 
            +
            	longest_candidate = 0;
         | 
| 521 | 
            +
            	for (i_candidate = 0; i_candidate < nb_candidates; i_candidate++)
         | 
| 522 | 
            +
            	{
         | 
| 523 | 
            +
            		if (candidates_lengths[i_candidate] > longest_candidate)
         | 
| 524 | 
            +
            			longest_candidate = candidates_lengths[i_candidate];
         | 
| 525 | 
            +
            		for (i_char = 0; i_char < candidates_lengths[i_candidate]; i_char++)
         | 
| 526 | 
            +
            		{
         | 
| 527 | 
            +
            			if (char_width == 4)
         | 
| 528 | 
            +
            				key = (uint32_t)(((uint32_t**)candidates)[i_candidate][i_char]);
         | 
| 529 | 
            +
            			else if (char_width == 2)
         | 
| 530 | 
            +
            				key = (uint32_t)(((uint16_t**)candidates)[i_candidate][i_char]);
         | 
| 531 | 
            +
            			else
         | 
| 532 | 
            +
            				key = (uint32_t)(((uint8_t**)candidates)[i_candidate][i_char]);
         | 
| 533 | 
            +
            			chr = NULL;
         | 
| 534 | 
            +
            			HASH_FIND(hh, chars, &key, sizeof(uint32_t), chr);
         | 
| 535 | 
            +
            			if (!chr) {
         | 
| 536 | 
            +
            				nb_chars++;
         | 
| 537 | 
            +
            				if (!(chr = malloc(sizeof(t_char))))
         | 
| 538 | 
            +
            				{
         | 
| 539 | 
            +
            					free_chars(chars);
         | 
| 540 | 
            +
            					return (NULL);
         | 
| 541 | 
            +
            				}
         | 
| 542 | 
            +
            				chr->id = key;
         | 
| 543 | 
            +
            				chr->new_representation = nb_chars;
         | 
| 544 | 
            +
            				HASH_ADD(hh, chars, id, sizeof(uint32_t), chr);
         | 
| 545 | 
            +
            			}
         | 
| 546 | 
            +
            		}
         | 
| 547 | 
            +
            	}
         | 
| 548 | 
            +
             | 
| 549 | 
            +
            	compressed_char_width = char_width;
         | 
| 550 | 
            +
            	// We keep one available char (0) to represent an unknown character in the input at runtime.
         | 
| 551 | 
            +
            	if (nb_chars < 256 - 1)
         | 
| 552 | 
            +
            		compressed_char_width = 1;
         | 
| 553 | 
            +
            	else if (nb_chars < 256 * 256 - 1)
         | 
| 554 | 
            +
            		compressed_char_width = 2;
         | 
| 555 | 
            +
            	char_access_width = 4;
         | 
| 556 | 
            +
            	// We can't go up to 256, since we need to be able to send inputs of arbitrary lengths at runtime
         | 
| 557 | 
            +
            	// and characters up to longest_candidate * 2 can be considered for the score.
         | 
| 558 | 
            +
            	if (longest_candidate < 128)
         | 
| 559 | 
            +
            		char_access_width = 1;
         | 
| 560 | 
            +
            	else if (longest_candidate < 256 * 128)
         | 
| 561 | 
            +
            		char_access_width = 2;
         | 
| 562 | 
            +
             | 
| 563 | 
            +
            	compressed_candidates = candidates;
         | 
| 564 | 
            +
             | 
| 565 | 
            +
            	// Rewrite candidates with smallest possible char_width
         | 
| 566 | 
            +
            	if (compressed_char_width < char_width)
         | 
| 567 | 
            +
            	{
         | 
| 568 | 
            +
            		if (!(compressed_candidates = malloc(sizeof(void*) * nb_candidates)))
         | 
| 569 | 
            +
            		{
         | 
| 570 | 
            +
            			free_chars(chars);
         | 
| 571 | 
            +
            			return (NULL);
         | 
| 572 | 
            +
            		}
         | 
| 573 | 
            +
            		for (i_candidate = 0; i_candidate < nb_candidates; i_candidate++)
         | 
| 574 | 
            +
            		{
         | 
| 575 | 
            +
            			if (!(compressed_candidates[i_candidate] = malloc(compressed_char_width * candidates_lengths[i_candidate])))
         | 
| 576 | 
            +
            			{
         | 
| 577 | 
            +
            				free_chars(chars);
         | 
| 578 | 
            +
            				for (i = 0; i < i_candidate; i++)
         | 
| 579 | 
            +
            					free(compressed_candidates[i]);
         | 
| 580 | 
            +
            				free(compressed_candidates);
         | 
| 581 | 
            +
            				return (NULL);
         | 
| 582 | 
            +
            			}
         | 
| 583 | 
            +
            			for (i_char = 0; i_char < candidates_lengths[i_candidate]; i_char++)
         | 
| 584 | 
            +
            			{
         | 
| 585 | 
            +
            				if (char_width == 4)
         | 
| 586 | 
            +
            					key = (uint32_t)(((uint32_t**)candidates)[i_candidate][i_char]);
         | 
| 587 | 
            +
            				else if (char_width == 2)
         | 
| 588 | 
            +
            					key = (uint32_t)(((uint16_t**)candidates)[i_candidate][i_char]);
         | 
| 589 | 
            +
            				else
         | 
| 590 | 
            +
            					key = (uint32_t)(((uint8_t**)candidates)[i_candidate][i_char]);
         | 
| 591 | 
            +
            				HASH_FIND(hh, chars, &key, sizeof(uint32_t), chr);
         | 
| 592 | 
            +
            				if (compressed_char_width == 4)
         | 
| 593 | 
            +
            					((uint32_t**)compressed_candidates)[i_candidate][i_char] = chr->new_representation;
         | 
| 594 | 
            +
            				else if (compressed_char_width == 2)
         | 
| 595 | 
            +
            					((uint16_t**)compressed_candidates)[i_candidate][i_char] = chr->new_representation;
         | 
| 596 | 
            +
            				else
         | 
| 597 | 
            +
            					((uint8_t**)compressed_candidates)[i_candidate][i_char] = chr->new_representation;
         | 
| 598 | 
            +
            			}
         | 
| 599 | 
            +
            		}
         | 
| 600 | 
            +
            	}
         | 
| 601 | 
            +
             | 
| 602 | 
            +
            	free_chars(chars);
         | 
| 603 | 
            +
             | 
| 604 | 
            +
            	exportable_model = build_exportable_model(
         | 
| 605 | 
            +
            		candidates, char_width, compressed_candidates, compressed_char_width, char_access_width,
         | 
| 606 | 
            +
            		candidates_lengths, nb_candidates, min_scores, nb_runtime_threads, res_model_size
         | 
| 607 | 
            +
            	);
         | 
| 608 | 
            +
             | 
| 609 | 
            +
            	if (compressed_candidates != candidates)
         | 
| 610 | 
            +
            	{
         | 
| 611 | 
            +
            		for (i_candidate = 0; i_candidate < nb_candidates; i_candidate++)
         | 
| 612 | 
            +
            			free(compressed_candidates[i_candidate]);
         | 
| 613 | 
            +
            		free(compressed_candidates);
         | 
| 614 | 
            +
            	}
         | 
| 615 | 
            +
             | 
| 616 | 
            +
            	return (exportable_model);
         | 
| 617 | 
            +
            }
         | 
| 618 | 
            +
             | 
| 619 | 
            +
            void	bjw_free_runtime_model(void *runtime_model)
         | 
| 620 | 
            +
            {
         | 
| 621 | 
            +
            	uint32_t	char_width;
         | 
| 622 | 
            +
            	uint32_t	char_access_width;
         | 
| 623 | 
            +
             | 
| 624 | 
            +
            	char_width = *((uint32_t*)(runtime_model + sizeof(uint32_t) * 2));
         | 
| 625 | 
            +
            	char_access_width = *((uint32_t*)(runtime_model + sizeof(uint32_t) * 3));
         | 
| 626 | 
            +
            	
         | 
| 627 | 
            +
            	void (*free_function)(void*) = NULL;
         | 
| 628 | 
            +
            	if (char_width == 4 && char_access_width == 4)
         | 
| 629 | 
            +
            		free_function = free_runtime_model_uint32_t_uint32_t;
         | 
| 630 | 
            +
            	else if (char_width == 4 && char_access_width == 2)
         | 
| 631 | 
            +
            		free_function = free_runtime_model_uint32_t_uint16_t;
         | 
| 632 | 
            +
            	else if (char_width == 4 && char_access_width == 1)
         | 
| 633 | 
            +
            		free_function = free_runtime_model_uint32_t_uint8_t;
         | 
| 634 | 
            +
            	else if (char_width == 2 && char_access_width == 4)
         | 
| 635 | 
            +
            		free_function = free_runtime_model_uint16_t_uint32_t;
         | 
| 636 | 
            +
            	else if (char_width == 2 && char_access_width == 2)
         | 
| 637 | 
            +
            		free_function = free_runtime_model_uint16_t_uint16_t;
         | 
| 638 | 
            +
            	else if (char_width == 2 && char_access_width == 1)
         | 
| 639 | 
            +
            		free_function = free_runtime_model_uint16_t_uint8_t;
         | 
| 640 | 
            +
            	else if (char_width == 1 && char_access_width == 4)
         | 
| 641 | 
            +
            		free_function = free_runtime_model_uint8_t_uint32_t;
         | 
| 642 | 
            +
            	else if (char_width == 1 && char_access_width == 2)
         | 
| 643 | 
            +
            		free_function = free_runtime_model_uint8_t_uint16_t;
         | 
| 644 | 
            +
            	else if (char_width == 1 && char_access_width == 1)
         | 
| 645 | 
            +
            		free_function = free_runtime_model_uint8_t_uint8_t;
         | 
| 646 | 
            +
             | 
| 647 | 
            +
            	free_function(runtime_model);
         | 
| 648 | 
            +
            }
         | 
| 649 | 
            +
             | 
| 650 | 
            +
            void	*bjw_build_runtime_model(void *exportable_model)
         | 
| 651 | 
            +
            {
         | 
| 652 | 
            +
            	uint32_t	nb_runtime_threads;
         | 
| 653 | 
            +
            	uint32_t	nb_candidates;
         | 
| 654 | 
            +
            	uint32_t	*model_size_per_thread;
         | 
| 655 | 
            +
            	uint32_t	char_width;
         | 
| 656 | 
            +
            	uint32_t	char_access_width;
         | 
| 657 | 
            +
            	uint32_t	original_char_width;
         | 
| 658 | 
            +
             | 
| 659 | 
            +
            	uint8_t *exportable_model_head = (uint8_t*)exportable_model;
         | 
| 660 | 
            +
            	nb_runtime_threads = *((uint32_t*)exportable_model_head);
         | 
| 661 | 
            +
            	exportable_model_head += sizeof(uint32_t);
         | 
| 662 | 
            +
            	nb_candidates = *((uint32_t*)exportable_model_head);
         | 
| 663 | 
            +
            	exportable_model_head += sizeof(uint32_t);
         | 
| 664 | 
            +
            	char_width = *((uint32_t*)exportable_model_head);
         | 
| 665 | 
            +
            	exportable_model_head += sizeof(uint32_t);
         | 
| 666 | 
            +
            	char_access_width = *((uint32_t*)exportable_model_head);
         | 
| 667 | 
            +
            	exportable_model_head += sizeof(uint32_t);
         | 
| 668 | 
            +
            	original_char_width = *((uint32_t*)exportable_model_head);
         | 
| 669 | 
            +
            	exportable_model_head += sizeof(uint32_t);
         | 
| 670 | 
            +
            	model_size_per_thread = (uint32_t*)exportable_model_head;
         | 
| 671 | 
            +
            	exportable_model_head += sizeof(uint32_t) * nb_runtime_threads;
         | 
| 672 | 
            +
             | 
| 673 | 
            +
            	void *(*build_function)(uint8_t*, uint32_t, uint32_t, uint32_t, uint32_t*) = NULL;
         | 
| 674 | 
            +
            	if (char_width == 4 && char_access_width == 4)
         | 
| 675 | 
            +
            		build_function = build_runtime_model_uint32_t_uint32_t;
         | 
| 676 | 
            +
            	else if (char_width == 4 && char_access_width == 2)
         | 
| 677 | 
            +
            		build_function = build_runtime_model_uint32_t_uint16_t;
         | 
| 678 | 
            +
            	else if (char_width == 4 && char_access_width == 1)
         | 
| 679 | 
            +
            		build_function = build_runtime_model_uint32_t_uint8_t;
         | 
| 680 | 
            +
            	else if (char_width == 2 && char_access_width == 4)
         | 
| 681 | 
            +
            		build_function = build_runtime_model_uint16_t_uint32_t;
         | 
| 682 | 
            +
            	else if (char_width == 2 && char_access_width == 2)
         | 
| 683 | 
            +
            		build_function = build_runtime_model_uint16_t_uint16_t;
         | 
| 684 | 
            +
            	else if (char_width == 2 && char_access_width == 1)
         | 
| 685 | 
            +
            		build_function = build_runtime_model_uint16_t_uint8_t;
         | 
| 686 | 
            +
            	else if (char_width == 1 && char_access_width == 4)
         | 
| 687 | 
            +
            		build_function = build_runtime_model_uint8_t_uint32_t;
         | 
| 688 | 
            +
            	else if (char_width == 1 && char_access_width == 2)
         | 
| 689 | 
            +
            		build_function = build_runtime_model_uint8_t_uint16_t;
         | 
| 690 | 
            +
            	else if (char_width == 1 && char_access_width == 1)
         | 
| 691 | 
            +
            		build_function = build_runtime_model_uint8_t_uint8_t;
         | 
| 692 | 
            +
             | 
| 693 | 
            +
            	return (build_function(exportable_model_head, nb_runtime_threads, nb_candidates, original_char_width, model_size_per_thread));
         | 
| 694 | 
            +
            }
         | 
| 695 | 
            +
             | 
| 696 | 
            +
            static int		sort_results_by_score(const void *void_res1, const void *void_res2)
         | 
| 697 | 
            +
            {
         | 
| 698 | 
            +
            	bjw_result	*res1;
         | 
| 699 | 
            +
            	bjw_result	*res2;
         | 
| 700 | 
            +
             | 
| 701 | 
            +
            	res1 = (bjw_result*)void_res1;
         | 
| 702 | 
            +
            	res2 = (bjw_result*)void_res2;
         | 
| 703 | 
            +
            	return (res1->score < res2->score ? 1 : res1->score == res2->score ? 0 : -1);
         | 
| 704 | 
            +
            }
         | 
| 705 | 
            +
             | 
| 706 | 
            +
            bjw_result	*bjw_jaro_winkler_distance(void *runtime_model, void *input, uint32_t input_length, float min_score, float weight, float threshold, uint32_t n_best_results, uint32_t *nb_results)
         | 
| 707 | 
            +
            {
         | 
| 708 | 
            +
            	uint32_t		i_thread;
         | 
| 709 | 
            +
            	uint32_t		nb_runtime_threads;
         | 
| 710 | 
            +
            	uint32_t		nb_candidates;
         | 
| 711 | 
            +
            	uint32_t		char_width;
         | 
| 712 | 
            +
            	uint32_t		char_access_width;
         | 
| 713 | 
            +
            	uint32_t		original_char_width;
         | 
| 714 | 
            +
            	char			both_min_score_and_min_scores;
         | 
| 715 | 
            +
            	uint32_t		n_best_i_try;
         | 
| 716 | 
            +
            	uint32_t		n_best_nb_tries;
         | 
| 717 | 
            +
            	float			n_best_tries[3];
         | 
| 718 | 
            +
            	bjw_result		*results;
         | 
| 719 | 
            +
            	bjw_result		*tmp_results;
         | 
| 720 | 
            +
            	uint32_t		results_decal;
         | 
| 721 | 
            +
            	t_thread_data	*threads_data;
         | 
| 722 | 
            +
            #if BJW_USE_THREADS
         | 
| 723 | 
            +
            # ifdef _WIN32
         | 
| 724 | 
            +
            	HANDLE			*threads;
         | 
| 725 | 
            +
            # else
         | 
| 726 | 
            +
            	pthread_t		*threads;
         | 
| 727 | 
            +
            # endif
         | 
| 728 | 
            +
            #endif
         | 
| 729 | 
            +
             | 
| 730 | 
            +
            	nb_runtime_threads = *((uint32_t*)(runtime_model + sizeof(uint32_t) * 0));
         | 
| 731 | 
            +
            	nb_candidates = *((uint32_t*)(runtime_model + sizeof(uint32_t) * 1));
         | 
| 732 | 
            +
            	char_width = *((uint32_t*)(runtime_model + sizeof(uint32_t) * 2));
         | 
| 733 | 
            +
            	char_access_width = *((uint32_t*)(runtime_model + sizeof(uint32_t) * 3));
         | 
| 734 | 
            +
            	original_char_width = *((uint32_t*)(runtime_model + sizeof(uint32_t) * 4));
         | 
| 735 | 
            +
             | 
| 736 | 
            +
            	// Characters after 256 won't be taken into consideration for score calculation anyway, and uint8_t won't be able to represent the indices.
         | 
| 737 | 
            +
            	if (char_access_width == 1 && input_length >= 256)
         | 
| 738 | 
            +
            		input_length = 256 - 1;
         | 
| 739 | 
            +
            	else if (char_access_width == 2 && input_length >= 256 * 256)
         | 
| 740 | 
            +
            		input_length = (256 * 256) - 1;
         | 
| 741 | 
            +
            	both_min_score_and_min_scores = min_score < 0.0f && n_best_results != 0;
         | 
| 742 | 
            +
            	if (n_best_results > nb_candidates)
         | 
| 743 | 
            +
            		n_best_results = nb_candidates;
         | 
| 744 | 
            +
             | 
| 745 | 
            +
            #if BJW_USE_THREADS
         | 
| 746 | 
            +
            # ifdef _WIN32
         | 
| 747 | 
            +
            	if (!(threads = malloc(sizeof(HANDLE) * nb_runtime_threads)))
         | 
| 748 | 
            +
            		return (NULL);
         | 
| 749 | 
            +
            # else
         | 
| 750 | 
            +
            	if (!(threads = malloc(sizeof(pthread_t) * nb_runtime_threads)))
         | 
| 751 | 
            +
            		return (NULL);
         | 
| 752 | 
            +
            # endif
         | 
| 753 | 
            +
            #endif
         | 
| 754 | 
            +
             | 
| 755 | 
            +
            	if (!(threads_data = malloc(sizeof(t_thread_data) * nb_runtime_threads)))
         | 
| 756 | 
            +
            		return (NULL);
         | 
| 757 | 
            +
             | 
| 758 | 
            +
            	void* (*runtime_function)(void*) = NULL;
         | 
| 759 | 
            +
            	if (char_width == 4 && char_access_width == 4)
         | 
| 760 | 
            +
            		runtime_function = jaro_winkler_distance_for_thread_uint32_t_uint32_t;
         | 
| 761 | 
            +
            	else if (char_width == 4 && char_access_width == 2)
         | 
| 762 | 
            +
            		runtime_function = jaro_winkler_distance_for_thread_uint32_t_uint16_t;
         | 
| 763 | 
            +
            	else if (char_width == 4 && char_access_width == 1)
         | 
| 764 | 
            +
            		runtime_function = jaro_winkler_distance_for_thread_uint32_t_uint8_t;
         | 
| 765 | 
            +
            	else if (char_width == 2 && char_access_width == 4)
         | 
| 766 | 
            +
            		runtime_function = jaro_winkler_distance_for_thread_uint16_t_uint32_t;
         | 
| 767 | 
            +
            	else if (char_width == 2 && char_access_width == 2)
         | 
| 768 | 
            +
            		runtime_function = jaro_winkler_distance_for_thread_uint16_t_uint16_t;
         | 
| 769 | 
            +
            	else if (char_width == 2 && char_access_width == 1)
         | 
| 770 | 
            +
            		runtime_function = jaro_winkler_distance_for_thread_uint16_t_uint8_t;
         | 
| 771 | 
            +
            	else if (char_width == 1 && char_access_width == 4)
         | 
| 772 | 
            +
            		runtime_function = jaro_winkler_distance_for_thread_uint8_t_uint32_t;
         | 
| 773 | 
            +
            	else if (char_width == 1 && char_access_width == 2)
         | 
| 774 | 
            +
            		runtime_function = jaro_winkler_distance_for_thread_uint8_t_uint16_t;
         | 
| 775 | 
            +
            	else if (char_width == 1 && char_access_width == 1)
         | 
| 776 | 
            +
            		runtime_function = jaro_winkler_distance_for_thread_uint8_t_uint8_t;
         | 
| 777 | 
            +
             | 
| 778 | 
            +
            	if (n_best_results != 0)
         | 
| 779 | 
            +
            	{
         | 
| 780 | 
            +
            		if (nb_candidates > 0)
         | 
| 781 | 
            +
            			n_best_tries[0] = 1.0f - (((float)n_best_results) / nb_candidates);
         | 
| 782 | 
            +
            		else
         | 
| 783 | 
            +
            			n_best_tries[0] = -1.0f;
         | 
| 784 | 
            +
            		if (n_best_tries[0] > 0.8f)
         | 
| 785 | 
            +
            			n_best_tries[0] = 0.8f;
         | 
| 786 | 
            +
            		n_best_tries[1] = n_best_tries[0] - 0.2f;
         | 
| 787 | 
            +
            		n_best_tries[1] = n_best_tries[1] < 0.0f ? -1.0f : n_best_tries[1];
         | 
| 788 | 
            +
            		n_best_tries[2] = min_score;
         | 
| 789 | 
            +
            		n_best_nb_tries = 3;
         | 
| 790 | 
            +
             | 
| 791 | 
            +
            		if (n_best_tries[1] <= min_score)
         | 
| 792 | 
            +
            		{
         | 
| 793 | 
            +
            			n_best_nb_tries--;
         | 
| 794 | 
            +
            			n_best_tries[1] = min_score;
         | 
| 795 | 
            +
            		}
         | 
| 796 | 
            +
            		if (n_best_tries[0] <= min_score)
         | 
| 797 | 
            +
            		{
         | 
| 798 | 
            +
            			n_best_nb_tries--;
         | 
| 799 | 
            +
            			n_best_tries[0] = min_score;
         | 
| 800 | 
            +
            		}
         | 
| 801 | 
            +
            	}
         | 
| 802 | 
            +
            	else
         | 
| 803 | 
            +
            	{
         | 
| 804 | 
            +
            		n_best_tries[0] = min_score;
         | 
| 805 | 
            +
            		n_best_nb_tries = 1;
         | 
| 806 | 
            +
            	}
         | 
| 807 | 
            +
             | 
| 808 | 
            +
            	for (n_best_i_try = 0; n_best_i_try < n_best_nb_tries; n_best_i_try++)
         | 
| 809 | 
            +
            	{
         | 
| 810 | 
            +
            		for (i_thread = 0; i_thread < nb_runtime_threads; i_thread++)
         | 
| 811 | 
            +
            		{
         | 
| 812 | 
            +
            			threads_data[i_thread] = (t_thread_data){
         | 
| 813 | 
            +
            				.runtime_models = runtime_model + sizeof(uint32_t) * 5,
         | 
| 814 | 
            +
            				.i_thread = i_thread,
         | 
| 815 | 
            +
            				.original_char_width = original_char_width,
         | 
| 816 | 
            +
            				.input = input,
         | 
| 817 | 
            +
            				.input_length = input_length,
         | 
| 818 | 
            +
            				.min_score = n_best_tries[n_best_i_try],
         | 
| 819 | 
            +
            				.weight = weight,
         | 
| 820 | 
            +
            				.threshold = threshold,
         | 
| 821 | 
            +
            				.both_min_score_and_min_scores = both_min_score_and_min_scores,
         | 
| 822 | 
            +
            				.results = NULL,
         | 
| 823 | 
            +
            				.nb_results = 0
         | 
| 824 | 
            +
            			};
         | 
| 825 | 
            +
             | 
| 826 | 
            +
            #if BJW_USE_THREADS
         | 
| 827 | 
            +
            # ifdef _WIN32
         | 
| 828 | 
            +
            			threads[i_thread] = CreateThread(NULL, 0, runtime_function, &(threads_data[i_thread]), 0, NULL);
         | 
| 829 | 
            +
            # else
         | 
| 830 | 
            +
            			pthread_create(&(threads[i_thread]), NULL, runtime_function, &(threads_data[i_thread]));
         | 
| 831 | 
            +
            # endif
         | 
| 832 | 
            +
            #else
         | 
| 833 | 
            +
            			runtime_function(&(threads_data[i_thread]));
         | 
| 834 | 
            +
            #endif
         | 
| 835 | 
            +
            		}
         | 
| 836 | 
            +
             | 
| 837 | 
            +
            		*nb_results = 0;
         | 
| 838 | 
            +
            		for (i_thread = 0; i_thread < nb_runtime_threads; i_thread++)
         | 
| 839 | 
            +
            		{
         | 
| 840 | 
            +
            #if BJW_USE_THREADS
         | 
| 841 | 
            +
            # ifdef _WIN32
         | 
| 842 | 
            +
            			WaitForSingleObject(threads[i_thread], INFINITE);
         | 
| 843 | 
            +
            			CloseHandle(threads[i_thread]);
         | 
| 844 | 
            +
            # else
         | 
| 845 | 
            +
            			pthread_join(threads[i_thread], NULL);
         | 
| 846 | 
            +
            # endif
         | 
| 847 | 
            +
            #endif
         | 
| 848 | 
            +
            			*nb_results += threads_data[i_thread].nb_results;
         | 
| 849 | 
            +
            		}
         | 
| 850 | 
            +
             | 
| 851 | 
            +
            		if (n_best_results == 0 || *nb_results >= n_best_results)
         | 
| 852 | 
            +
            			break ;
         | 
| 853 | 
            +
            	}
         | 
| 854 | 
            +
             | 
| 855 | 
            +
            	if (!(results = malloc(sizeof(bjw_result) * (*nb_results))))
         | 
| 856 | 
            +
            		return (NULL);
         | 
| 857 | 
            +
            	results_decal = 0;
         | 
| 858 | 
            +
            	for (i_thread = 0; i_thread < nb_runtime_threads; i_thread++)
         | 
| 859 | 
            +
            	{
         | 
| 860 | 
            +
            		memcpy(&(results[results_decal]), threads_data[i_thread].results, sizeof(bjw_result) * threads_data[i_thread].nb_results);
         | 
| 861 | 
            +
            		free(threads_data[i_thread].results);
         | 
| 862 | 
            +
            		results_decal += threads_data[i_thread].nb_results;
         | 
| 863 | 
            +
            	}
         | 
| 864 | 
            +
             | 
| 865 | 
            +
            	if (n_best_results != 0)
         | 
| 866 | 
            +
            	{
         | 
| 867 | 
            +
            		qsort(results, *nb_results, sizeof(bjw_result), &sort_results_by_score);
         | 
| 868 | 
            +
            		if (*nb_results > n_best_results)
         | 
| 869 | 
            +
            		{
         | 
| 870 | 
            +
            			if (!(tmp_results = malloc(sizeof(bjw_result) * n_best_results)))
         | 
| 871 | 
            +
            				return (NULL);
         | 
| 872 | 
            +
            			memcpy(tmp_results, results, sizeof(bjw_result) * n_best_results);
         | 
| 873 | 
            +
            			free(results);
         | 
| 874 | 
            +
            			results = tmp_results;
         | 
| 875 | 
            +
            			*nb_results = n_best_results;
         | 
| 876 | 
            +
            		}
         | 
| 877 | 
            +
            	}
         | 
| 878 | 
            +
             | 
| 879 | 
            +
            #if BJW_USE_THREADS
         | 
| 880 | 
            +
            	free(threads);
         | 
| 881 | 
            +
            #endif
         | 
| 882 | 
            +
            	free(threads_data);
         | 
| 883 | 
            +
             | 
| 884 | 
            +
            	return (results);
         | 
| 885 | 
            +
            }
         | 
| 886 | 
            +
             | 
| 887 | 
            +
            bjw_result	*bjw_jaro_distance(void *runtime_model, void *input, uint32_t input_length, float min_score, uint32_t n_best_results, uint32_t *nb_results)
         | 
| 888 | 
            +
            {
         | 
| 889 | 
            +
            	return (bjw_jaro_winkler_distance(runtime_model, input, input_length, min_score, -1.0f, -1.0f, n_best_results, nb_results));
         | 
| 890 | 
            +
            }
         |