batch_jaro_winkler 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1f2ffa14f3a5763647191da3a1df8683a76d1a3919ee0b0dc3585a6827810801
4
- data.tar.gz: ba85b0a2c0e9f729642701c9f5062d99ea90fa5f49a12ba0887d536709859d45
3
+ metadata.gz: 432282dad48464ef1b3bbbbdf7f58d23482a4995dd19a71e76d77053dff0d075
4
+ data.tar.gz: 5062b7c8d7854fa7966fde862b827d95d9fe7f14633102c2d785dc09a9a24cd9
5
5
  SHA512:
6
- metadata.gz: 1334145d89c9a4f45900d728757fbc86ac5f6800dc2d09b7dc61d55a87fc6cc0fadb2490e7076e09dc22086ef09ead801f58965f6a46adc43714545d7040488b
7
- data.tar.gz: 140a21c3203d148bcb5aa6f444114088f0a72eb44a655932855565945aacbf0916ded0a761e6eaee8a387e1e32473490866664e9b36dc9ad8f2c4b850fa36e7f
6
+ metadata.gz: fc6a7605d46daa03aff1e33bde2d9047af693ad10d2618635eea4f3aff8515b00ab82d999f7b49e4b4754748a11b450c37683a4746123e6378e9b1ebde127692
7
+ data.tar.gz: 594362d2c8d4ec96f6fe8efaa879b5a38b3de430dfba5639f61b154034d004fadf501f108164daadc0822988426e8f395df106a9f825ac1dafc81637a48d774f
File without changes
File without changes
@@ -326,13 +326,13 @@ static uint8_t *build_exportable_model_for_thread(
326
326
  if (store_original_candidates)
327
327
  {
328
328
  memcpy(
329
- res_original_candidates + (candidates_char_decal * original_char_width),
329
+ (uint8_t*)res_original_candidates + (candidates_char_decal * original_char_width),
330
330
  original_candidates[sorted_candidates[i_candidate].original_ind],
331
331
  sorted_candidates[i_candidate].candidate_length * original_char_width
332
332
  );
333
333
  }
334
334
  memcpy(
335
- res_compressed_candidates + (candidates_char_decal * compressed_char_width),
335
+ (uint8_t*)res_compressed_candidates + (candidates_char_decal * compressed_char_width),
336
336
  sorted_candidates[i_candidate].candidate,
337
337
  sorted_candidates[i_candidate].candidate_length * compressed_char_width
338
338
  );
@@ -415,8 +415,8 @@ static void *build_exportable_model(
415
415
  {
416
416
  uint32_t i_thread;
417
417
  uint32_t i;
418
- uint8_t *model_per_thread[nb_runtime_threads];
419
- uint32_t model_size_per_thread[nb_runtime_threads];
418
+ uint8_t **model_per_thread;
419
+ uint32_t *model_size_per_thread;
420
420
  uint8_t *res_buffer;
421
421
  uint8_t *res_buffer_head;
422
422
  void **original_candidates_for_thread;
@@ -427,6 +427,9 @@ static void *build_exportable_model(
427
427
  uint32_t nb_taken_candidates;
428
428
  uint32_t aligned_model_size;
429
429
 
430
+ model_per_thread = malloc(sizeof(uint8_t*) * nb_runtime_threads);
431
+ model_size_per_thread = malloc(sizeof(uint32_t) * nb_runtime_threads);
432
+
430
433
  nb_taken_candidates = 0;
431
434
  *res_model_size = 0;
432
435
  for (i_thread = 0; i_thread < nb_runtime_threads; i_thread++)
@@ -445,6 +448,8 @@ static void *build_exportable_model(
445
448
  {
446
449
  for (i = 0; i < i_thread; i++)
447
450
  free(model_per_thread[i]);
451
+ free(model_per_thread);
452
+ free(model_size_per_thread);
448
453
  return (NULL);
449
454
  }
450
455
  // align on next 4 byte boundary
@@ -459,6 +464,8 @@ static void *build_exportable_model(
459
464
  {
460
465
  for (i_thread = 0; i_thread < nb_runtime_threads; i_thread++)
461
466
  free(model_per_thread[i_thread]);
467
+ free(model_per_thread);
468
+ free(model_size_per_thread);
462
469
  return (NULL);
463
470
  }
464
471
  *((uint32_t*)(res_buffer + sizeof(uint32_t) * 0)) = nb_runtime_threads;
@@ -478,6 +485,9 @@ static void *build_exportable_model(
478
485
  free(model_per_thread[i_thread]);
479
486
  }
480
487
 
488
+ free(model_per_thread);
489
+ free(model_size_per_thread);
490
+
481
491
  return (res_buffer);
482
492
  }
483
493
 
@@ -621,8 +631,8 @@ void bjw_free_runtime_model(void *runtime_model)
621
631
  uint32_t char_width;
622
632
  uint32_t char_access_width;
623
633
 
624
- char_width = *((uint32_t*)(runtime_model + sizeof(uint32_t) * 2));
625
- char_access_width = *((uint32_t*)(runtime_model + sizeof(uint32_t) * 3));
634
+ char_width = *((uint32_t*)(((uint8_t*)runtime_model) + sizeof(uint32_t) * 2));
635
+ char_access_width = *((uint32_t*)(((uint8_t*)runtime_model) + sizeof(uint32_t) * 3));
626
636
 
627
637
  void (*free_function)(void*) = NULL;
628
638
  if (char_width == 4 && char_access_width == 4)
@@ -720,18 +730,18 @@ bjw_result *bjw_jaro_winkler_distance(void *runtime_model, void *input, uint32_t
720
730
  uint32_t results_decal;
721
731
  t_thread_data *threads_data;
722
732
  #if BJW_USE_THREADS
723
- # ifdef _WIN32
733
+ # if defined(_WIN32) || defined(_WIN64)
724
734
  HANDLE *threads;
725
735
  # else
726
736
  pthread_t *threads;
727
737
  # endif
728
738
  #endif
729
739
 
730
- nb_runtime_threads = *((uint32_t*)(runtime_model + sizeof(uint32_t) * 0));
731
- nb_candidates = *((uint32_t*)(runtime_model + sizeof(uint32_t) * 1));
732
- char_width = *((uint32_t*)(runtime_model + sizeof(uint32_t) * 2));
733
- char_access_width = *((uint32_t*)(runtime_model + sizeof(uint32_t) * 3));
734
- original_char_width = *((uint32_t*)(runtime_model + sizeof(uint32_t) * 4));
740
+ nb_runtime_threads = *((uint32_t*)(((uint8_t*)runtime_model) + sizeof(uint32_t) * 0));
741
+ nb_candidates = *((uint32_t*)(((uint8_t*)runtime_model) + sizeof(uint32_t) * 1));
742
+ char_width = *((uint32_t*)(((uint8_t*)runtime_model) + sizeof(uint32_t) * 2));
743
+ char_access_width = *((uint32_t*)(((uint8_t*)runtime_model) + sizeof(uint32_t) * 3));
744
+ original_char_width = *((uint32_t*)(((uint8_t*)runtime_model) + sizeof(uint32_t) * 4));
735
745
 
736
746
  // Characters after 256 won't be taken into consideration for score calculation anyway, and uint8_t won't be able to represent the indices.
737
747
  if (char_access_width == 1 && input_length >= 256)
@@ -743,7 +753,7 @@ bjw_result *bjw_jaro_winkler_distance(void *runtime_model, void *input, uint32_t
743
753
  n_best_results = nb_candidates;
744
754
 
745
755
  #if BJW_USE_THREADS
746
- # ifdef _WIN32
756
+ # if defined(_WIN32) || defined(_WIN64)
747
757
  if (!(threads = malloc(sizeof(HANDLE) * nb_runtime_threads)))
748
758
  return (NULL);
749
759
  # else
@@ -810,7 +820,7 @@ bjw_result *bjw_jaro_winkler_distance(void *runtime_model, void *input, uint32_t
810
820
  for (i_thread = 0; i_thread < nb_runtime_threads; i_thread++)
811
821
  {
812
822
  threads_data[i_thread] = (t_thread_data){
813
- .runtime_models = runtime_model + sizeof(uint32_t) * 5,
823
+ .runtime_models = ((uint8_t*)runtime_model) + sizeof(uint32_t) * 5,
814
824
  .i_thread = i_thread,
815
825
  .original_char_width = original_char_width,
816
826
  .input = input,
@@ -824,7 +834,7 @@ bjw_result *bjw_jaro_winkler_distance(void *runtime_model, void *input, uint32_t
824
834
  };
825
835
 
826
836
  #if BJW_USE_THREADS
827
- # ifdef _WIN32
837
+ # if defined(_WIN32) || defined(_WIN64)
828
838
  threads[i_thread] = CreateThread(NULL, 0, runtime_function, &(threads_data[i_thread]), 0, NULL);
829
839
  # else
830
840
  pthread_create(&(threads[i_thread]), NULL, runtime_function, &(threads_data[i_thread]));
@@ -838,7 +848,7 @@ bjw_result *bjw_jaro_winkler_distance(void *runtime_model, void *input, uint32_t
838
848
  for (i_thread = 0; i_thread < nb_runtime_threads; i_thread++)
839
849
  {
840
850
  #if BJW_USE_THREADS
841
- # ifdef _WIN32
851
+ # if defined(_WIN32) || defined(_WIN64)
842
852
  WaitForSingleObject(threads[i_thread], INFINITE);
843
853
  CloseHandle(threads[i_thread]);
844
854
  # else
File without changes
@@ -35,7 +35,7 @@ SOFTWARE.
35
35
  # include <math.h>
36
36
 
37
37
  # if BJW_USE_THREADS
38
- # ifdef _WIN32
38
+ # if defined(_WIN32) || defined(_WIN64)
39
39
  # include <windows.h>
40
40
  # else
41
41
  # include <pthread.h>
@@ -26,6 +26,10 @@ SOFTWARE.
26
26
  #define BJW_SUFFIX_HANDLER(name, type1, type2) BJW_SUFFIX_PASTER(name, type1, type2)
27
27
  #define BJW_SUFFIX(name) BJW_SUFFIX_HANDLER(name, BJW_CHAR_TYPE, BJW_CHAR_ACCESS_TYPE)
28
28
 
29
+ #if defined(_WIN32) || defined(_WIN64)
30
+ #define bzero(b,len) (memset((b), '\0', (len)), (void) 0)
31
+ #endif
32
+
29
33
  // this represents the data needed for a candidate when finding matches
30
34
  typedef struct
31
35
  {
@@ -212,8 +216,8 @@ static void BJW_SUFFIX(free_runtime_model)
212
216
  uint32_t i_thread;
213
217
  uint32_t nb_runtime_threads;
214
218
 
215
- nb_runtime_threads = *((uint32_t*)(runtime_model + sizeof(uint32_t) * 0));
216
- runtime_models = (BJW_SUFFIX(t_runtime_model)*)(runtime_model + sizeof(uint32_t) * 5);
219
+ nb_runtime_threads = *((uint32_t*)(((uint8_t*)runtime_model) + sizeof(uint32_t) * 0));
220
+ runtime_models = (BJW_SUFFIX(t_runtime_model)*)(((uint8_t*)runtime_model) + sizeof(uint32_t) * 5);
217
221
  for (i_thread = 0; i_thread < nb_runtime_threads; i_thread++)
218
222
  BJW_SUFFIX(free_runtime_model_for_thread)(&(runtime_models[i_thread]));
219
223
  free(runtime_model);
@@ -230,12 +234,12 @@ static void *BJW_SUFFIX(build_runtime_model)
230
234
  res = malloc(sizeof(BJW_SUFFIX(t_runtime_model)) * nb_runtime_threads + sizeof(uint32_t) * 5);
231
235
  if (!res)
232
236
  return (NULL);
233
- *((uint32_t*)(res + sizeof(uint32_t) * 0)) = nb_runtime_threads;
234
- *((uint32_t*)(res + sizeof(uint32_t) * 1)) = nb_candidates;
235
- *((uint32_t*)(res + sizeof(uint32_t) * 2)) = sizeof(BJW_CHAR_TYPE);
236
- *((uint32_t*)(res + sizeof(uint32_t) * 3)) = sizeof(BJW_CHAR_ACCESS_TYPE);
237
- *((uint32_t*)(res + sizeof(uint32_t) * 4)) = original_char_width;
238
- runtime_models = res + sizeof(uint32_t) * 5;
237
+ *((uint32_t*)((uint8_t*)res + sizeof(uint32_t) * 0)) = nb_runtime_threads;
238
+ *((uint32_t*)((uint8_t*)res + sizeof(uint32_t) * 1)) = nb_candidates;
239
+ *((uint32_t*)((uint8_t*)res + sizeof(uint32_t) * 2)) = sizeof(BJW_CHAR_TYPE);
240
+ *((uint32_t*)((uint8_t*)res + sizeof(uint32_t) * 3)) = sizeof(BJW_CHAR_ACCESS_TYPE);
241
+ *((uint32_t*)((uint8_t*)res + sizeof(uint32_t) * 4)) = original_char_width;
242
+ runtime_models = (BJW_SUFFIX(t_runtime_model)*)((uint8_t*)res + sizeof(uint32_t) * 5);
239
243
 
240
244
  for (i_thread = 0; i_thread < nb_runtime_threads; i_thread++)
241
245
  {
@@ -426,7 +430,7 @@ static uint32_t BJW_SUFFIX(jaro_winkler_distance_from_flags)
426
430
  {
427
431
  if (candidate_min_score <= 0.0f)
428
432
  {
429
- results[nb_results].candidate = runtime_model->original_candidates + original_char_width * candidate_decal;
433
+ results[nb_results].candidate = ((uint8_t*)(runtime_model->original_candidates)) + original_char_width * candidate_decal;
430
434
  results[nb_results].score = 0.0f;
431
435
  results[nb_results].candidate_length = candidate_data->candidate_length;
432
436
  nb_results++;
@@ -458,7 +462,7 @@ static uint32_t BJW_SUFFIX(jaro_winkler_distance_from_flags)
458
462
  if (score < candidate_min_score)
459
463
  continue ;
460
464
 
461
- results[nb_results].candidate = runtime_model->original_candidates + original_char_width * candidate_decal;
465
+ results[nb_results].candidate = ((uint8_t*)(runtime_model->original_candidates)) + original_char_width * candidate_decal;
462
466
  results[nb_results].score = score;
463
467
  results[nb_results].candidate_length = candidate_data->candidate_length;
464
468
  nb_results++;
@@ -63,7 +63,7 @@ do {
63
63
  #endif
64
64
 
65
65
  /* a number of the hash function use uint32_t which isn't defined on Pre VS2010 */
66
- #if defined(_WIN32)
66
+ #if defined(_WIN32) || defined(_WIN64)
67
67
  #if defined(_MSC_VER) && _MSC_VER >= 1600
68
68
  #include <stdint.h>
69
69
  #elif defined(__WATCOMC__) || defined(__MINGW32__) || defined(__CYGWIN__)
File without changes
@@ -1,3 +1,3 @@
1
1
  module BatchJaroWinkler
2
- VERSION = '0.1.1'
2
+ VERSION = '0.1.3'
3
3
  end
File without changes
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: batch_jaro_winkler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dominik Bousquet
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-04-24 00:00:00.000000000 Z
11
+ date: 2023-12-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 1.12.0
19
+ version: 1.16.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 1.12.0
26
+ version: 1.16.0
27
27
  description: This project gets its performance from the pre-calculation of an optimized
28
28
  model in advance of the actual runtime calculations. Supports any encoding.
29
29
  email: bousquet.dominik@gmail.com
@@ -62,7 +62,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
62
62
  - !ruby/object:Gem::Version
63
63
  version: '0'
64
64
  requirements: []
65
- rubygems_version: 3.1.4
65
+ rubygems_version: 3.4.6
66
66
  signing_key:
67
67
  specification_version: 4
68
68
  summary: Fast batch jaro winkler distance implementation in C99.