kanayago 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +12 -0
  3. data/.ruby-version +1 -0
  4. data/README.md +20 -29
  5. data/Rakefile +43 -96
  6. data/ext/kanayago/extconf.rb +6 -0
  7. data/ext/kanayago/id.h +12 -5
  8. data/ext/kanayago/id_table.h +15 -0
  9. data/ext/kanayago/include/ruby/st.h +199 -0
  10. data/ext/kanayago/internal/array.h +3 -0
  11. data/ext/kanayago/internal/basic_operators.h +1 -0
  12. data/ext/kanayago/internal/bignum.h +1 -0
  13. data/ext/kanayago/internal/bits.h +82 -0
  14. data/ext/kanayago/internal/encoding.h +4 -1
  15. data/ext/kanayago/internal/error.h +33 -0
  16. data/ext/kanayago/internal/fixnum.h +1 -0
  17. data/ext/kanayago/internal/gc.h +47 -11
  18. data/ext/kanayago/internal/hash.h +3 -0
  19. data/ext/kanayago/internal/imemo.h +93 -32
  20. data/ext/kanayago/internal/io.h +30 -7
  21. data/ext/kanayago/internal/namespace.h +81 -0
  22. data/ext/kanayago/internal/numeric.h +1 -0
  23. data/ext/kanayago/internal/parse.h +17 -3
  24. data/ext/kanayago/internal/re.h +7 -2
  25. data/ext/kanayago/internal/sanitizers.h +88 -39
  26. data/ext/kanayago/internal/set_table.h +70 -0
  27. data/ext/kanayago/internal/string.h +33 -16
  28. data/ext/kanayago/internal/symbol.h +4 -3
  29. data/ext/kanayago/internal/thread.h +42 -9
  30. data/ext/kanayago/internal/variable.h +13 -11
  31. data/ext/kanayago/internal/vm.h +4 -5
  32. data/ext/kanayago/internal.h +0 -3
  33. data/ext/kanayago/kanayago.c +554 -235
  34. data/ext/kanayago/kanayago.h +5 -0
  35. data/ext/kanayago/literal_node.c +343 -0
  36. data/ext/kanayago/literal_node.h +30 -0
  37. data/ext/kanayago/method.h +18 -2
  38. data/ext/kanayago/node.c +7 -1
  39. data/ext/kanayago/node.h +14 -3
  40. data/ext/kanayago/parse.c +7602 -7156
  41. data/ext/kanayago/parse.h +39 -39
  42. data/ext/kanayago/parser_st.c +2 -1
  43. data/ext/kanayago/pattern_node.c +78 -0
  44. data/ext/kanayago/pattern_node.h +13 -0
  45. data/ext/kanayago/ruby_atomic.h +43 -0
  46. data/ext/kanayago/ruby_parser.c +7 -35
  47. data/ext/kanayago/rubyparser.h +83 -80
  48. data/ext/kanayago/scope_node.c +34 -0
  49. data/ext/kanayago/scope_node.h +8 -0
  50. data/ext/kanayago/shape.h +321 -111
  51. data/ext/kanayago/st.c +905 -21
  52. data/ext/kanayago/statement_node.c +795 -0
  53. data/ext/kanayago/statement_node.h +66 -0
  54. data/ext/kanayago/string_node.c +192 -0
  55. data/ext/kanayago/string_node.h +19 -0
  56. data/ext/kanayago/symbol.h +2 -9
  57. data/ext/kanayago/thread_pthread.h +10 -3
  58. data/ext/kanayago/universal_parser.c +1 -20
  59. data/ext/kanayago/variable_node.c +72 -0
  60. data/ext/kanayago/variable_node.h +12 -0
  61. data/ext/kanayago/vm_core.h +205 -71
  62. data/lib/kanayago/literal_node.rb +87 -0
  63. data/lib/kanayago/pattern_node.rb +19 -0
  64. data/lib/kanayago/statement_node.rb +222 -0
  65. data/lib/kanayago/string_node.rb +43 -0
  66. data/lib/kanayago/variable_node.rb +23 -0
  67. data/lib/kanayago/version.rb +1 -1
  68. data/lib/kanayago.rb +22 -0
  69. data/patch/3.4/copy_target.rb +78 -0
  70. data/patch/3.4/kanayago.patch +162 -0
  71. data/patch/head/copy_target.rb +84 -0
  72. data/patch/head/kanayago.patch +162 -0
  73. data/sample/minitest_generator.rb +266 -0
  74. data/sample/test_generator.rb +272 -0
  75. data/typeprof.conf.json +9 -0
  76. metadata +32 -4
  77. data/ext/kanayago/parse.tmp.y +0 -16145
data/ext/kanayago/st.c CHANGED
@@ -103,12 +103,15 @@
103
103
  #ifdef NOT_RUBY
104
104
  #include "regint.h"
105
105
  #include "st.h"
106
+ #include <assert.h>
106
107
  #elif defined RUBY_EXPORT
107
108
  #include "internal.h"
108
109
  #include "internal/bits.h"
109
110
  #include "internal/hash.h"
110
111
  #include "internal/sanitizers.h"
112
+ #include "internal/set_table.h"
111
113
  #include "internal/st.h"
114
+ #include "ruby_assert.h"
112
115
  #endif
113
116
 
114
117
  #include <stdio.h>
@@ -116,7 +119,6 @@
116
119
  #include <stdlib.h>
117
120
  #endif
118
121
  #include <string.h>
119
- #include <assert.h>
120
122
 
121
123
  #ifdef __GNUC__
122
124
  #define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
@@ -314,17 +316,22 @@ static const struct st_features features[] = {
314
316
  #define RESERVED_HASH_VAL (~(st_hash_t) 0)
315
317
  #define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
316
318
 
317
- /* Return hash value of KEY for table TAB. */
318
319
  static inline st_hash_t
319
- do_hash(st_data_t key, st_table *tab)
320
+ normalize_hash_value(st_hash_t hash)
320
321
  {
321
- st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
322
-
323
322
  /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
324
323
  another value. Such mapping should be extremely rare. */
325
324
  return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
326
325
  }
327
326
 
327
+ /* Return hash value of KEY for table TAB. */
328
+ static inline st_hash_t
329
+ do_hash(st_data_t key, st_table *tab)
330
+ {
331
+ st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
332
+ return normalize_hash_value(hash);
333
+ }
334
+
328
335
  /* Power of 2 defining the minimal number of allocated entries. */
329
336
  #define MINIMAL_POWER2 2
330
337
 
@@ -741,7 +748,7 @@ rebuild_table(st_table *tab)
741
748
  else {
742
749
  st_table *new_tab;
743
750
  /* This allocation could trigger GC and compaction. If tab is the
744
- * gen_iv_tbl, then tab could have changed in size due to objects being
751
+ * gen_fields_tbl, then tab could have changed in size due to objects being
745
752
  * freed and/or moved. Do not store attributes of tab before this line. */
746
753
  new_tab = st_init_table_with_size(tab->type,
747
754
  2 * tab->num_entries - 1);
@@ -784,6 +791,8 @@ rebuild_table_with(st_table *const new_tab, st_table *const tab)
784
791
  new_tab->num_entries++;
785
792
  ni++;
786
793
  }
794
+
795
+ assert(new_tab->num_entries == tab->num_entries);
787
796
  }
788
797
 
789
798
  static void
@@ -1173,6 +1182,8 @@ st_add_direct_with_hash(st_table *tab,
1173
1182
  st_index_t ind;
1174
1183
  st_index_t bin_ind;
1175
1184
 
1185
+ assert(hash != RESERVED_HASH_VAL);
1186
+
1176
1187
  rebuild_table_if_necessary(tab);
1177
1188
  ind = tab->entries_bound++;
1178
1189
  entry = &tab->entries[ind];
@@ -1190,7 +1201,7 @@ void
1190
1201
  rb_st_add_direct_with_hash(st_table *tab,
1191
1202
  st_data_t key, st_data_t value, st_hash_t hash)
1192
1203
  {
1193
- st_add_direct_with_hash(tab, key, value, hash);
1204
+ st_add_direct_with_hash(tab, key, value, normalize_hash_value(hash));
1194
1205
  }
1195
1206
 
1196
1207
  /* Insert (KEY, VALUE) into table TAB. The table should not have
@@ -1484,7 +1495,16 @@ st_update(st_table *tab, st_data_t key,
1484
1495
  value = entry->record;
1485
1496
  }
1486
1497
  old_key = key;
1498
+
1499
+ unsigned int rebuilds_num = tab->rebuilds_num;
1500
+
1487
1501
  retval = (*func)(&key, &value, arg, existing);
1502
+
1503
+ // We need to make sure that the callback didn't cause a table rebuild
1504
+ // Ideally we would make sure no operations happened
1505
+ assert(rebuilds_num == tab->rebuilds_num);
1506
+ (void)rebuilds_num;
1507
+
1488
1508
  switch (retval) {
1489
1509
  case ST_CONTINUE:
1490
1510
  if (! existing) {
@@ -2309,20 +2329,6 @@ rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash)
2309
2329
  st_insert_generic(tab, argc, argv, hash);
2310
2330
  }
2311
2331
 
2312
- // to iterate iv_index_tbl
2313
- st_data_t
2314
- rb_st_nth_key(st_table *tab, st_index_t index)
2315
- {
2316
- if (LIKELY(tab->entries_start == 0 &&
2317
- tab->num_entries == tab->entries_bound &&
2318
- index < tab->num_entries)) {
2319
- return tab->entries[index].key;
2320
- }
2321
- else {
2322
- rb_bug("unreachable");
2323
- }
2324
- }
2325
-
2326
2332
  void
2327
2333
  rb_st_compact_table(st_table *tab)
2328
2334
  {
@@ -2336,4 +2342,882 @@ rb_st_compact_table(st_table *tab)
2336
2342
  }
2337
2343
  }
2338
2344
 
2345
+ /*
2346
+ * set_table related code
2347
+ */
2348
+
2349
+ struct set_table_entry {
2350
+ st_hash_t hash;
2351
+ st_data_t key;
2352
+ };
2353
+
2354
+ /* Return hash value of KEY for table TAB. */
2355
+ static inline st_hash_t
2356
+ set_do_hash(st_data_t key, set_table *tab)
2357
+ {
2358
+ st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
2359
+ return normalize_hash_value(hash);
2360
+ }
2361
+
2362
+ /* Return bin size index of table TAB. */
2363
+ static inline unsigned int
2364
+ set_get_size_ind(const set_table *tab)
2365
+ {
2366
+ return tab->size_ind;
2367
+ }
2368
+
2369
+ /* Return the number of allocated bins of table TAB. */
2370
+ static inline st_index_t
2371
+ set_get_bins_num(const set_table *tab)
2372
+ {
2373
+ return ((st_index_t) 1)<<tab->bin_power;
2374
+ }
2375
+
2376
+ /* Return mask for a bin index in table TAB. */
2377
+ static inline st_index_t
2378
+ set_bins_mask(const set_table *tab)
2379
+ {
2380
+ return set_get_bins_num(tab) - 1;
2381
+ }
2382
+
2383
+ /* Return the index of table TAB bin corresponding to
2384
+ HASH_VALUE. */
2385
+ static inline st_index_t
2386
+ set_hash_bin(st_hash_t hash_value, set_table *tab)
2387
+ {
2388
+ return hash_value & set_bins_mask(tab);
2389
+ }
2390
+
2391
+ /* Return the number of allocated entries of table TAB. */
2392
+ static inline st_index_t
2393
+ set_get_allocated_entries(const set_table *tab)
2394
+ {
2395
+ return ((st_index_t) 1)<<tab->entry_power;
2396
+ }
2397
+
2398
+ static inline size_t
2399
+ set_allocated_entries_size(const set_table *tab)
2400
+ {
2401
+ return set_get_allocated_entries(tab) * sizeof(set_table_entry);
2402
+ }
2403
+
2404
+ static inline bool
2405
+ set_has_bins(const set_table *tab)
2406
+ {
2407
+ return tab->entry_power > MAX_POWER2_FOR_TABLES_WITHOUT_BINS;
2408
+ }
2409
+
2410
+ /* Return size of the allocated bins of table TAB. */
2411
+ static inline st_index_t
2412
+ set_bins_size(const set_table *tab)
2413
+ {
2414
+ if (set_has_bins(tab)) {
2415
+ return features[tab->entry_power].bins_words * sizeof (st_index_t);
2416
+ }
2417
+
2418
+ return 0;
2419
+ }
2420
+
2421
+ static inline st_index_t *
2422
+ set_bins_ptr(const set_table *tab)
2423
+ {
2424
+ if (set_has_bins(tab)) {
2425
+ return (st_index_t *)(((char *)tab->entries) + set_allocated_entries_size(tab));
2426
+ }
2427
+
2428
+ return NULL;
2429
+ }
2430
+
2431
+ /* Mark all bins of table TAB as empty. */
2432
+ static void
2433
+ set_initialize_bins(set_table *tab)
2434
+ {
2435
+ memset(set_bins_ptr(tab), 0, set_bins_size(tab));
2436
+ }
2437
+
2438
+ /* Make table TAB empty. */
2439
+ static void
2440
+ set_make_tab_empty(set_table *tab)
2441
+ {
2442
+ tab->num_entries = 0;
2443
+ tab->entries_start = tab->entries_bound = 0;
2444
+ if (set_bins_ptr(tab) != NULL)
2445
+ set_initialize_bins(tab);
2446
+ }
2447
+
2448
+ static set_table *
2449
+ set_init_existing_table_with_size(set_table *tab, const struct st_hash_type *type, st_index_t size)
2450
+ {
2451
+ int n;
2452
+
2453
+ #ifdef HASH_LOG
2454
+ #if HASH_LOG+0 < 0
2455
+ {
2456
+ const char *e = getenv("ST_HASH_LOG");
2457
+ if (!e || !*e) init_st = 1;
2458
+ }
2459
+ #endif
2460
+ if (init_st == 0) {
2461
+ init_st = 1;
2462
+ atexit(stat_col);
2463
+ }
2464
+ #endif
2465
+
2466
+ n = get_power2(size);
2467
+
2468
+ tab->type = type;
2469
+ tab->entry_power = n;
2470
+ tab->bin_power = features[n].bin_power;
2471
+ tab->size_ind = features[n].size_ind;
2472
+
2473
+ size_t memsize = 0;
2474
+ if (set_has_bins(tab)) {
2475
+ memsize += set_bins_size(tab);
2476
+ }
2477
+ memsize += set_get_allocated_entries(tab) * sizeof(set_table_entry);
2478
+ tab->entries = (set_table_entry *)malloc(memsize);
2479
+ set_make_tab_empty(tab);
2480
+ tab->rebuilds_num = 0;
2481
+ return tab;
2482
+ }
2483
+
2484
+ /* Create and return table with TYPE which can hold at least SIZE
2485
+ entries. The real number of entries which the table can hold is
2486
+ the nearest power of two for SIZE. */
2487
+ set_table *
2488
+ set_init_table_with_size(set_table *tab, const struct st_hash_type *type, st_index_t size)
2489
+ {
2490
+ if (tab == NULL) tab = malloc(sizeof(set_table));
2491
+
2492
+ set_init_existing_table_with_size(tab, type, size);
2493
+
2494
+ return tab;
2495
+ }
2496
+
2497
+ set_table *
2498
+ set_init_numtable(void)
2499
+ {
2500
+ return set_init_table_with_size(NULL, &type_numhash, 0);
2501
+ }
2502
+
2503
+ set_table *
2504
+ set_init_numtable_with_size(st_index_t size)
2505
+ {
2506
+ return set_init_table_with_size(NULL, &type_numhash, size);
2507
+ }
2508
+
2509
+ size_t
2510
+ set_table_size(const struct set_table *tbl)
2511
+ {
2512
+ return tbl->num_entries;
2513
+ }
2514
+
2515
+ /* Make table TAB empty. */
2516
+ void
2517
+ set_table_clear(set_table *tab)
2518
+ {
2519
+ set_make_tab_empty(tab);
2520
+ tab->rebuilds_num++;
2521
+ }
2522
+
2523
+ /* Free table TAB space. This should only be used if you passed NULL to
2524
+ set_init_table_with_size/set_copy when creating the table. */
2525
+ void
2526
+ set_free_table(set_table *tab)
2527
+ {
2528
+ free(tab->entries);
2529
+ free(tab);
2530
+ }
2531
+
2532
+ /* Return byte size of memory allocated for table TAB. */
2533
+ size_t
2534
+ set_memsize(const set_table *tab)
2535
+ {
2536
+ return(sizeof(set_table)
2537
+ + (tab->entry_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS ? 0 : set_bins_size(tab))
2538
+ + set_get_allocated_entries(tab) * sizeof(set_table_entry));
2539
+ }
2540
+
2541
+ static st_index_t
2542
+ set_find_table_entry_ind(set_table *tab, st_hash_t hash_value, st_data_t key);
2543
+
2544
+ static st_index_t
2545
+ set_find_table_bin_ind(set_table *tab, st_hash_t hash_value, st_data_t key);
2546
+
2547
+ static st_index_t
2548
+ set_find_table_bin_ind_direct(set_table *table, st_hash_t hash_value, st_data_t key);
2549
+
2550
+ static st_index_t
2551
+ set_find_table_bin_ptr_and_reserve(set_table *tab, st_hash_t *hash_value,
2552
+ st_data_t key, st_index_t *bin_ind);
2553
+
2554
+ static void set_rebuild_table_with(set_table *const new_tab, set_table *const tab);
2555
+ static void set_rebuild_move_table(set_table *const new_tab, set_table *const tab);
2556
+ static void set_rebuild_cleanup(set_table *const tab);
2557
+
2558
+ /* Rebuild table TAB. Rebuilding removes all deleted bins and entries
2559
+ and can change size of the table entries and bins arrays.
2560
+ Rebuilding is implemented by creation of a new table or by
2561
+ compaction of the existing one. */
2562
+ static void
2563
+ set_rebuild_table(set_table *tab)
2564
+ {
2565
+ if ((2 * tab->num_entries <= set_get_allocated_entries(tab)
2566
+ && REBUILD_THRESHOLD * tab->num_entries > set_get_allocated_entries(tab))
2567
+ || tab->num_entries < (1 << MINIMAL_POWER2)) {
2568
+ /* Compaction: */
2569
+ tab->num_entries = 0;
2570
+ if (set_has_bins(tab))
2571
+ set_initialize_bins(tab);
2572
+ set_rebuild_table_with(tab, tab);
2573
+ }
2574
+ else {
2575
+ set_table *new_tab;
2576
+ /* This allocation could trigger GC and compaction. If tab is the
2577
+ * gen_fields_tbl, then tab could have changed in size due to objects being
2578
+ * freed and/or moved. Do not store attributes of tab before this line. */
2579
+ new_tab = set_init_table_with_size(NULL, tab->type,
2580
+ 2 * tab->num_entries - 1);
2581
+ set_rebuild_table_with(new_tab, tab);
2582
+ set_rebuild_move_table(new_tab, tab);
2583
+ }
2584
+ set_rebuild_cleanup(tab);
2585
+ }
2586
+
2587
+ static void
2588
+ set_rebuild_table_with(set_table *const new_tab, set_table *const tab)
2589
+ {
2590
+ st_index_t i, ni;
2591
+ unsigned int size_ind;
2592
+ set_table_entry *new_entries;
2593
+ set_table_entry *curr_entry_ptr;
2594
+ st_index_t *bins;
2595
+ st_index_t bin_ind;
2596
+
2597
+ new_entries = new_tab->entries;
2598
+
2599
+ ni = 0;
2600
+ bins = set_bins_ptr(new_tab);
2601
+ size_ind = set_get_size_ind(new_tab);
2602
+ st_index_t bound = tab->entries_bound;
2603
+ set_table_entry *entries = tab->entries;
2604
+
2605
+ for (i = tab->entries_start; i < bound; i++) {
2606
+ curr_entry_ptr = &entries[i];
2607
+ PREFETCH(entries + i + 1, 0);
2608
+ if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
2609
+ continue;
2610
+ if (&new_entries[ni] != curr_entry_ptr)
2611
+ new_entries[ni] = *curr_entry_ptr;
2612
+ if (EXPECT(bins != NULL, 1)) {
2613
+ bin_ind = set_find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
2614
+ curr_entry_ptr->key);
2615
+ set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
2616
+ }
2617
+ new_tab->num_entries++;
2618
+ ni++;
2619
+ }
2620
+
2621
+ assert(new_tab->num_entries == tab->num_entries);
2622
+ }
2623
+
2624
+ static void
2625
+ set_rebuild_move_table(set_table *const new_tab, set_table *const tab)
2626
+ {
2627
+ tab->entry_power = new_tab->entry_power;
2628
+ tab->bin_power = new_tab->bin_power;
2629
+ tab->size_ind = new_tab->size_ind;
2630
+ free(tab->entries);
2631
+ tab->entries = new_tab->entries;
2632
+ free(new_tab);
2633
+ }
2634
+
2635
+ static void
2636
+ set_rebuild_cleanup(set_table *const tab)
2637
+ {
2638
+ tab->entries_start = 0;
2639
+ tab->entries_bound = tab->num_entries;
2640
+ tab->rebuilds_num++;
2641
+ }
2642
+
2643
+ /* Return the next secondary hash index for table TAB using previous
2644
+ index IND and PERTURB. Finally modulo of the function becomes a
2645
+ full *cycle linear congruential generator*, in other words it
2646
+ guarantees traversing all table bins in extreme case.
2647
+
2648
+ According the Hull-Dobell theorem a generator
2649
+ "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
2650
+ o m and c are relatively prime
2651
+ o a-1 is divisible by all prime factors of m
2652
+ o a-1 is divisible by 4 if m is divisible by 4.
2653
+
2654
+ For our case a is 5, c is 1, and m is a power of two. */
2655
+ static inline st_index_t
2656
+ set_secondary_hash(st_index_t ind, set_table *tab, st_index_t *perturb)
2657
+ {
2658
+ *perturb >>= 11;
2659
+ ind = (ind << 2) + ind + *perturb + 1;
2660
+ return set_hash_bin(ind, tab);
2661
+ }
2662
+
2663
+ /* Find an entry with HASH_VALUE and KEY in TABLE using a linear
2664
+ search. Return the index of the found entry in array `entries`.
2665
+ If it is not found, return UNDEFINED_ENTRY_IND. If the table was
2666
+ rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
2667
+ static inline st_index_t
2668
+ set_find_entry(set_table *tab, st_hash_t hash_value, st_data_t key)
2669
+ {
2670
+ int eq_p, rebuilt_p;
2671
+ st_index_t i, bound;
2672
+ set_table_entry *entries;
2673
+
2674
+ bound = tab->entries_bound;
2675
+ entries = tab->entries;
2676
+ for (i = tab->entries_start; i < bound; i++) {
2677
+ DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
2678
+ if (EXPECT(rebuilt_p, 0))
2679
+ return REBUILT_TABLE_ENTRY_IND;
2680
+ if (eq_p)
2681
+ return i;
2682
+ }
2683
+ return UNDEFINED_ENTRY_IND;
2684
+ }
2685
+
2686
+ /* Use the quadratic probing. The method has a better data locality
2687
+ but more collisions than the current approach. In average it
2688
+ results in a bit slower search. */
2689
+ /*#define QUADRATIC_PROBE*/
2690
+
2691
+ /* Return index of entry with HASH_VALUE and KEY in table TAB. If
2692
+ there is no such entry, return UNDEFINED_ENTRY_IND. If the table
2693
+ was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
2694
+ static st_index_t
2695
+ set_find_table_entry_ind(set_table *tab, st_hash_t hash_value, st_data_t key)
2696
+ {
2697
+ int eq_p, rebuilt_p;
2698
+ st_index_t ind;
2699
+ #ifdef QUADRATIC_PROBE
2700
+ st_index_t d;
2701
+ #else
2702
+ st_index_t perturb;
2703
+ #endif
2704
+ st_index_t bin;
2705
+ set_table_entry *entries = tab->entries;
2706
+
2707
+ ind = set_hash_bin(hash_value, tab);
2708
+ #ifdef QUADRATIC_PROBE
2709
+ d = 1;
2710
+ #else
2711
+ perturb = hash_value;
2712
+ #endif
2713
+ for (;;) {
2714
+ bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2715
+ if (! EMPTY_OR_DELETED_BIN_P(bin)) {
2716
+ DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
2717
+ if (EXPECT(rebuilt_p, 0))
2718
+ return REBUILT_TABLE_ENTRY_IND;
2719
+ if (eq_p)
2720
+ break;
2721
+ }
2722
+ else if (EMPTY_BIN_P(bin))
2723
+ return UNDEFINED_ENTRY_IND;
2724
+ #ifdef QUADRATIC_PROBE
2725
+ ind = set_hash_bin(ind + d, tab);
2726
+ d++;
2727
+ #else
2728
+ ind = set_secondary_hash(ind, tab, &perturb);
2729
+ #endif
2730
+ }
2731
+ return bin;
2732
+ }
2733
+
2734
+ /* Find and return index of table TAB bin corresponding to an entry
2735
+ with HASH_VALUE and KEY. If there is no such bin, return
2736
+ UNDEFINED_BIN_IND. If the table was rebuilt during the search,
2737
+ return REBUILT_TABLE_BIN_IND. */
2738
+ static st_index_t
2739
+ set_find_table_bin_ind(set_table *tab, st_hash_t hash_value, st_data_t key)
2740
+ {
2741
+ int eq_p, rebuilt_p;
2742
+ st_index_t ind;
2743
+ #ifdef QUADRATIC_PROBE
2744
+ st_index_t d;
2745
+ #else
2746
+ st_index_t perturb;
2747
+ #endif
2748
+ st_index_t bin;
2749
+ set_table_entry *entries = tab->entries;
2750
+
2751
+ ind = set_hash_bin(hash_value, tab);
2752
+ #ifdef QUADRATIC_PROBE
2753
+ d = 1;
2754
+ #else
2755
+ perturb = hash_value;
2756
+ #endif
2757
+ for (;;) {
2758
+ bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2759
+ if (! EMPTY_OR_DELETED_BIN_P(bin)) {
2760
+ DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
2761
+ if (EXPECT(rebuilt_p, 0))
2762
+ return REBUILT_TABLE_BIN_IND;
2763
+ if (eq_p)
2764
+ break;
2765
+ }
2766
+ else if (EMPTY_BIN_P(bin))
2767
+ return UNDEFINED_BIN_IND;
2768
+ #ifdef QUADRATIC_PROBE
2769
+ ind = set_hash_bin(ind + d, tab);
2770
+ d++;
2771
+ #else
2772
+ ind = set_secondary_hash(ind, tab, &perturb);
2773
+ #endif
2774
+ }
2775
+ return ind;
2776
+ }
2777
+
2778
+ /* Find and return index of table TAB bin corresponding to an entry
2779
+ with HASH_VALUE and KEY. The entry should be in the table
2780
+ already. */
2781
+ static st_index_t
2782
+ set_find_table_bin_ind_direct(set_table *tab, st_hash_t hash_value, st_data_t key)
2783
+ {
2784
+ st_index_t ind;
2785
+ #ifdef QUADRATIC_PROBE
2786
+ st_index_t d;
2787
+ #else
2788
+ st_index_t perturb;
2789
+ #endif
2790
+ st_index_t bin;
2791
+
2792
+ ind = set_hash_bin(hash_value, tab);
2793
+ #ifdef QUADRATIC_PROBE
2794
+ d = 1;
2795
+ #else
2796
+ perturb = hash_value;
2797
+ #endif
2798
+ for (;;) {
2799
+ bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2800
+ if (EMPTY_OR_DELETED_BIN_P(bin))
2801
+ return ind;
2802
+ #ifdef QUADRATIC_PROBE
2803
+ ind = set_hash_bin(ind + d, tab);
2804
+ d++;
2805
+ #else
2806
+ ind = set_secondary_hash(ind, tab, &perturb);
2807
+ #endif
2808
+ }
2809
+ }
2810
+
2811
+ /* Mark I-th bin of table TAB as empty, in other words not
2812
+ corresponding to any entry. */
2813
+ #define MARK_SET_BIN_EMPTY(tab, i) (set_bin(set_bins_ptr(tab), set_get_size_ind(tab), i, EMPTY_BIN))
2814
+
2815
+ /* Return index of table TAB bin for HASH_VALUE and KEY through
2816
+ BIN_IND and the pointed value as the function result. Reserve the
2817
+ bin for inclusion of the corresponding entry into the table if it
2818
+ is not there yet. We always find such bin as bins array length is
2819
+ bigger entries array. Although we can reuse a deleted bin, the
2820
+ result bin value is always empty if the table has no entry with
2821
+ KEY. Return the entries array index of the found entry or
2822
+ UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
2823
+ during the search, return REBUILT_TABLE_ENTRY_IND. */
2824
+ static st_index_t
2825
+ set_find_table_bin_ptr_and_reserve(set_table *tab, st_hash_t *hash_value,
2826
+ st_data_t key, st_index_t *bin_ind)
2827
+ {
2828
+ int eq_p, rebuilt_p;
2829
+ st_index_t ind;
2830
+ st_hash_t curr_hash_value = *hash_value;
2831
+ #ifdef QUADRATIC_PROBE
2832
+ st_index_t d;
2833
+ #else
2834
+ st_index_t perturb;
2835
+ #endif
2836
+ st_index_t entry_index;
2837
+ st_index_t firset_deleted_bin_ind;
2838
+ set_table_entry *entries;
2839
+
2840
+ ind = set_hash_bin(curr_hash_value, tab);
2841
+ #ifdef QUADRATIC_PROBE
2842
+ d = 1;
2843
+ #else
2844
+ perturb = curr_hash_value;
2845
+ #endif
2846
+ firset_deleted_bin_ind = UNDEFINED_BIN_IND;
2847
+ entries = tab->entries;
2848
+ for (;;) {
2849
+ entry_index = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), ind);
2850
+ if (EMPTY_BIN_P(entry_index)) {
2851
+ tab->num_entries++;
2852
+ entry_index = UNDEFINED_ENTRY_IND;
2853
+ if (firset_deleted_bin_ind != UNDEFINED_BIN_IND) {
2854
+ /* We can reuse bin of a deleted entry. */
2855
+ ind = firset_deleted_bin_ind;
2856
+ MARK_SET_BIN_EMPTY(tab, ind);
2857
+ }
2858
+ break;
2859
+ }
2860
+ else if (! DELETED_BIN_P(entry_index)) {
2861
+ DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
2862
+ if (EXPECT(rebuilt_p, 0))
2863
+ return REBUILT_TABLE_ENTRY_IND;
2864
+ if (eq_p)
2865
+ break;
2866
+ }
2867
+ else if (firset_deleted_bin_ind == UNDEFINED_BIN_IND)
2868
+ firset_deleted_bin_ind = ind;
2869
+ #ifdef QUADRATIC_PROBE
2870
+ ind = set_hash_bin(ind + d, tab);
2871
+ d++;
2872
+ #else
2873
+ ind = set_secondary_hash(ind, tab, &perturb);
2874
+ #endif
2875
+ }
2876
+ *bin_ind = ind;
2877
+ return entry_index;
2878
+ }
2879
+
2880
+ /* Find an entry with KEY in table TAB. Return non-zero if we found
2881
+ it. */
2882
+ int
2883
+ set_table_lookup(set_table *tab, st_data_t key)
2884
+ {
2885
+ st_index_t bin;
2886
+ st_hash_t hash = set_do_hash(key, tab);
2887
+
2888
+ retry:
2889
+ if (!set_has_bins(tab)) {
2890
+ bin = set_find_entry(tab, hash, key);
2891
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2892
+ goto retry;
2893
+ if (bin == UNDEFINED_ENTRY_IND)
2894
+ return 0;
2895
+ }
2896
+ else {
2897
+ bin = set_find_table_entry_ind(tab, hash, key);
2898
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2899
+ goto retry;
2900
+ if (bin == UNDEFINED_ENTRY_IND)
2901
+ return 0;
2902
+ bin -= ENTRY_BASE;
2903
+ }
2904
+ return 1;
2905
+ }
2906
+
2907
+ /* Check the table and rebuild it if it is necessary. */
2908
+ static inline void
2909
+ set_rebuild_table_if_necessary (set_table *tab)
2910
+ {
2911
+ st_index_t bound = tab->entries_bound;
2912
+
2913
+ if (bound == set_get_allocated_entries(tab))
2914
+ set_rebuild_table(tab);
2915
+ }
2916
+
2917
+ /* Insert KEY into table TAB and return zero. If there is
2918
+ already entry with KEY in the table, return nonzero and update
2919
+ the value of the found entry. */
2920
+ int
2921
+ set_insert(set_table *tab, st_data_t key)
2922
+ {
2923
+ set_table_entry *entry;
2924
+ st_index_t bin;
2925
+ st_index_t ind;
2926
+ st_hash_t hash_value;
2927
+ st_index_t bin_ind;
2928
+ int new_p;
2929
+
2930
+ hash_value = set_do_hash(key, tab);
2931
+ retry:
2932
+ set_rebuild_table_if_necessary(tab);
2933
+ if (!set_has_bins(tab)) {
2934
+ bin = set_find_entry(tab, hash_value, key);
2935
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2936
+ goto retry;
2937
+ new_p = bin == UNDEFINED_ENTRY_IND;
2938
+ if (new_p)
2939
+ tab->num_entries++;
2940
+ bin_ind = UNDEFINED_BIN_IND;
2941
+ }
2942
+ else {
2943
+ bin = set_find_table_bin_ptr_and_reserve(tab, &hash_value,
2944
+ key, &bin_ind);
2945
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
2946
+ goto retry;
2947
+ new_p = bin == UNDEFINED_ENTRY_IND;
2948
+ bin -= ENTRY_BASE;
2949
+ }
2950
+ if (new_p) {
2951
+ ind = tab->entries_bound++;
2952
+ entry = &tab->entries[ind];
2953
+ entry->hash = hash_value;
2954
+ entry->key = key;
2955
+ if (bin_ind != UNDEFINED_BIN_IND)
2956
+ set_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
2957
+ return 0;
2958
+ }
2959
+ return 1;
2960
+ }
2961
+
2962
+ /* Create a copy of old_tab into new_tab. */
2963
+ static set_table *
2964
+ set_replace(set_table *new_tab, set_table *old_tab)
2965
+ {
2966
+ *new_tab = *old_tab;
2967
+ size_t memsize = set_allocated_entries_size(old_tab) + set_bins_size(old_tab);
2968
+ new_tab->entries = (set_table_entry *)malloc(memsize);
2969
+ MEMCPY(new_tab->entries, old_tab->entries, char, memsize);
2970
+ return new_tab;
2971
+ }
2972
+
2973
+ /* Create and return a copy of table OLD_TAB. */
2974
+ set_table *
2975
+ set_copy(set_table *new_tab, set_table *old_tab)
2976
+ {
2977
+ if (new_tab == NULL) new_tab = (set_table *) malloc(sizeof(set_table));
2978
+
2979
+ if (set_replace(new_tab, old_tab) == NULL) {
2980
+ set_free_table(new_tab);
2981
+ return NULL;
2982
+ }
2983
+
2984
+ return new_tab;
2985
+ }
2986
+
2987
+ /* Update the entries start of table TAB after removing an entry
2988
+ with index N in the array entries. */
2989
+ static inline void
2990
+ set_update_range_for_deleted(set_table *tab, st_index_t n)
2991
+ {
2992
+ /* Do not update entries_bound here. Otherwise, we can fill all
2993
+ bins by deleted entry value before rebuilding the table. */
2994
+ if (tab->entries_start == n) {
2995
+ st_index_t start = n + 1;
2996
+ st_index_t bound = tab->entries_bound;
2997
+ set_table_entry *entries = tab->entries;
2998
+ while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
2999
+ tab->entries_start = start;
3000
+ }
3001
+ }
3002
+
3003
+ /* Mark I-th bin of table TAB as corresponding to a deleted table
3004
+ entry. Update number of entries in the table and number of bins
3005
+ corresponding to deleted entries. */
3006
+ #define MARK_SET_BIN_DELETED(tab, i) \
3007
+ do { \
3008
+ set_bin(set_bins_ptr(tab), set_get_size_ind(tab), i, DELETED_BIN); \
3009
+ } while (0)
3010
+
3011
+ /* Delete entry with KEY from table TAB, and return non-zero. If
3012
+ there is no entry with KEY in the table, return zero. */
3013
+ int
3014
+ set_table_delete(set_table *tab, st_data_t *key)
3015
+ {
3016
+ set_table_entry *entry;
3017
+ st_index_t bin;
3018
+ st_index_t bin_ind;
3019
+ st_hash_t hash;
3020
+
3021
+ hash = set_do_hash(*key, tab);
3022
+ retry:
3023
+ if (!set_has_bins(tab)) {
3024
+ bin = set_find_entry(tab, hash, *key);
3025
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3026
+ goto retry;
3027
+ if (bin == UNDEFINED_ENTRY_IND) {
3028
+ return 0;
3029
+ }
3030
+ }
3031
+ else {
3032
+ bin_ind = set_find_table_bin_ind(tab, hash, *key);
3033
+ if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
3034
+ goto retry;
3035
+ if (bin_ind == UNDEFINED_BIN_IND) {
3036
+ return 0;
3037
+ }
3038
+ bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
3039
+ MARK_SET_BIN_DELETED(tab, bin_ind);
3040
+ }
3041
+ entry = &tab->entries[bin];
3042
+ *key = entry->key;
3043
+ MARK_ENTRY_DELETED(entry);
3044
+ tab->num_entries--;
3045
+ set_update_range_for_deleted(tab, bin);
3046
+ return 1;
3047
+ }
3048
+
3049
+ /* Traverse all entries in table TAB calling FUNC with current entry
3050
+ key and zero. If the call returns ST_STOP, stop
3051
+ traversing. If the call returns ST_DELETE, delete the current
3052
+ entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
3053
+ traversing. The function returns zero unless an error is found.
3054
+ CHECK_P is flag of set_foreach_check call. The behavior is a bit
3055
+ different for ST_CHECK and when the current element is removed
3056
+ during traversing. */
3057
+ static inline int
3058
+ set_general_foreach(set_table *tab, set_foreach_check_callback_func *func,
3059
+ set_update_callback_func *replace, st_data_t arg,
3060
+ int check_p)
3061
+ {
3062
+ st_index_t bin;
3063
+ st_index_t bin_ind;
3064
+ set_table_entry *entries, *curr_entry_ptr;
3065
+ enum st_retval retval;
3066
+ st_index_t i, rebuilds_num;
3067
+ st_hash_t hash;
3068
+ st_data_t key;
3069
+ int error_p, packed_p = !set_has_bins(tab);
3070
+
3071
+ entries = tab->entries;
3072
+ /* The bound can change inside the loop even without rebuilding
3073
+ the table, e.g. by an entry insertion. */
3074
+ for (i = tab->entries_start; i < tab->entries_bound; i++) {
3075
+ curr_entry_ptr = &entries[i];
3076
+ if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
3077
+ continue;
3078
+ key = curr_entry_ptr->key;
3079
+ rebuilds_num = tab->rebuilds_num;
3080
+ hash = curr_entry_ptr->hash;
3081
+ retval = (*func)(key, arg, 0);
3082
+
3083
+ if (retval == ST_REPLACE && replace) {
3084
+ retval = (*replace)(&key, arg, TRUE);
3085
+ curr_entry_ptr->key = key;
3086
+ }
3087
+
3088
+ if (rebuilds_num != tab->rebuilds_num) {
3089
+ retry:
3090
+ entries = tab->entries;
3091
+ packed_p = !set_has_bins(tab);
3092
+ if (packed_p) {
3093
+ i = set_find_entry(tab, hash, key);
3094
+ if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
3095
+ goto retry;
3096
+ error_p = i == UNDEFINED_ENTRY_IND;
3097
+ }
3098
+ else {
3099
+ i = set_find_table_entry_ind(tab, hash, key);
3100
+ if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
3101
+ goto retry;
3102
+ error_p = i == UNDEFINED_ENTRY_IND;
3103
+ i -= ENTRY_BASE;
3104
+ }
3105
+ if (error_p && check_p) {
3106
+ /* call func with error notice */
3107
+ retval = (*func)(0, arg, 1);
3108
+ return 1;
3109
+ }
3110
+ curr_entry_ptr = &entries[i];
3111
+ }
3112
+ switch (retval) {
3113
+ case ST_REPLACE:
3114
+ break;
3115
+ case ST_CONTINUE:
3116
+ break;
3117
+ case ST_CHECK:
3118
+ if (check_p)
3119
+ break;
3120
+ case ST_STOP:
3121
+ return 0;
3122
+ case ST_DELETE: {
3123
+ st_data_t key = curr_entry_ptr->key;
3124
+
3125
+ again:
3126
+ if (packed_p) {
3127
+ bin = set_find_entry(tab, hash, key);
3128
+ if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
3129
+ goto again;
3130
+ if (bin == UNDEFINED_ENTRY_IND)
3131
+ break;
3132
+ }
3133
+ else {
3134
+ bin_ind = set_find_table_bin_ind(tab, hash, key);
3135
+ if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
3136
+ goto again;
3137
+ if (bin_ind == UNDEFINED_BIN_IND)
3138
+ break;
3139
+ bin = get_bin(set_bins_ptr(tab), set_get_size_ind(tab), bin_ind) - ENTRY_BASE;
3140
+ MARK_SET_BIN_DELETED(tab, bin_ind);
3141
+ }
3142
+ curr_entry_ptr = &entries[bin];
3143
+ MARK_ENTRY_DELETED(curr_entry_ptr);
3144
+ tab->num_entries--;
3145
+ set_update_range_for_deleted(tab, bin);
3146
+ break;
3147
+ }
3148
+ }
3149
+ }
3150
+ return 0;
3151
+ }
3152
+
3153
+ int
3154
+ set_foreach_with_replace(set_table *tab, set_foreach_check_callback_func *func, set_update_callback_func *replace, st_data_t arg)
3155
+ {
3156
+ return set_general_foreach(tab, func, replace, arg, TRUE);
3157
+ }
3158
+
3159
+ struct set_functor {
3160
+ set_foreach_callback_func *func;
3161
+ st_data_t arg;
3162
+ };
3163
+
3164
+ static int
3165
+ set_apply_functor(st_data_t k, st_data_t d, int _)
3166
+ {
3167
+ const struct set_functor *f = (void *)d;
3168
+ return f->func(k, f->arg);
3169
+ }
3170
+
3171
+ int
3172
+ set_table_foreach(set_table *tab, set_foreach_callback_func *func, st_data_t arg)
3173
+ {
3174
+ const struct set_functor f = { func, arg };
3175
+ return set_general_foreach(tab, set_apply_functor, NULL, (st_data_t)&f, FALSE);
3176
+ }
3177
+
3178
+ /* See comments for function set_delete_safe. */
3179
+ int
3180
+ set_foreach_check(set_table *tab, set_foreach_check_callback_func *func, st_data_t arg,
3181
+ st_data_t never ATTRIBUTE_UNUSED)
3182
+ {
3183
+ return set_general_foreach(tab, func, NULL, arg, TRUE);
3184
+ }
3185
+
3186
+ /* Set up array KEYS by at most SIZE keys of head table TAB entries.
3187
+ Return the number of keys set up in array KEYS. */
3188
+ inline st_index_t
3189
+ set_keys(set_table *tab, st_data_t *keys, st_index_t size)
3190
+ {
3191
+ st_index_t i, bound;
3192
+ st_data_t key, *keys_start, *keys_end;
3193
+ set_table_entry *curr_entry_ptr, *entries = tab->entries;
3194
+
3195
+ bound = tab->entries_bound;
3196
+ keys_start = keys;
3197
+ keys_end = keys + size;
3198
+ for (i = tab->entries_start; i < bound; i++) {
3199
+ if (keys == keys_end)
3200
+ break;
3201
+ curr_entry_ptr = &entries[i];
3202
+ key = curr_entry_ptr->key;
3203
+ if (! DELETED_ENTRY_P(curr_entry_ptr))
3204
+ *keys++ = key;
3205
+ }
3206
+
3207
+ return keys - keys_start;
3208
+ }
3209
+
3210
+ void
3211
+ set_compact_table(set_table *tab)
3212
+ {
3213
+ st_index_t num = tab->num_entries;
3214
+ if (REBUILD_THRESHOLD * num <= set_get_allocated_entries(tab)) {
3215
+ /* Compaction: */
3216
+ set_table *new_tab = set_init_table_with_size(NULL, tab->type, 2 * num);
3217
+ set_rebuild_table_with(new_tab, tab);
3218
+ set_rebuild_move_table(new_tab, tab);
3219
+ set_rebuild_cleanup(tab);
3220
+ }
3221
+ }
3222
+
2339
3223
  #endif