multi_string_replace 0.1.0 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/.gitignore +1 -0
 - data/Gemfile.lock +4 -14
 - data/LICENSE.txt +1 -1
 - data/README.md +40 -3
 - data/bin/benchmark.rb +4 -3
 - data/ext/multi_string_replace/aho_trie.c +58 -73
 - data/ext/multi_string_replace/aho_trie.h +3 -0
 - data/ext/multi_string_replace/ahocorasick.c +28 -4
 - data/ext/multi_string_replace/ahocorasick.h +15 -1
 - data/ext/multi_string_replace/multi_string_replace.c +37 -9
 - data/lib/multi_string_replace.rb +11 -1
 - data/lib/multi_string_replace/version.rb +1 -1
 - data/multi_string_replace.gemspec +4 -5
 - data/replaced.txt +651 -0
 - data/replaced2.txt +651 -0
 - metadata +10 -20
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA256:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: '03680f4522d98a7a162df95c1151feedabe821febc15a03c7a98a6996dd43cc1'
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 80222e5675b3310fd079e6099026f8fa584d77542a0a34a30a3c6c3c67643cdb
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 6bcda8e829e5fd9e747c567a44c09363f304bbc8162d661418fb491f6626019c158d8a60345bb814cab42598c3830d884e6b8b70b20215d9614cb36d6913928e
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 3f2895d64ba1d7560e866104f499a592767fa1dd161c1ff3d975126091d8330f06e85e25f37182de46f05c5dcb51a3f8ed4e38b352b78c8a1955dde1fdca5396
         
     | 
    
        data/.gitignore
    CHANGED
    
    
    
        data/Gemfile.lock
    CHANGED
    
    | 
         @@ -1,22 +1,13 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            PATH
         
     | 
| 
       2 
2 
     | 
    
         
             
              remote: .
         
     | 
| 
       3 
3 
     | 
    
         
             
              specs:
         
     | 
| 
       4 
     | 
    
         
            -
                multi_string_replace ( 
     | 
| 
      
 4 
     | 
    
         
            +
                multi_string_replace (1.0.4)
         
     | 
| 
       5 
5 
     | 
    
         | 
| 
       6 
6 
     | 
    
         
             
            GEM
         
     | 
| 
       7 
7 
     | 
    
         
             
              remote: https://rubygems.org/
         
     | 
| 
       8 
8 
     | 
    
         
             
              specs:
         
     | 
| 
       9 
     | 
    
         
            -
                byebug (10.0.2)
         
     | 
| 
       10 
     | 
    
         
            -
                coderay (1.1.2)
         
     | 
| 
       11 
9 
     | 
    
         
             
                diff-lcs (1.3)
         
     | 
| 
       12 
     | 
    
         
            -
                 
     | 
| 
       13 
     | 
    
         
            -
                pry (0.11.3)
         
     | 
| 
       14 
     | 
    
         
            -
                  coderay (~> 1.1.0)
         
     | 
| 
       15 
     | 
    
         
            -
                  method_source (~> 0.9.0)
         
     | 
| 
       16 
     | 
    
         
            -
                pry-byebug (3.6.0)
         
     | 
| 
       17 
     | 
    
         
            -
                  byebug (~> 10.0)
         
     | 
| 
       18 
     | 
    
         
            -
                  pry (~> 0.10)
         
     | 
| 
       19 
     | 
    
         
            -
                rake (10.5.0)
         
     | 
| 
      
 10 
     | 
    
         
            +
                rake (13.0.1)
         
     | 
| 
       20 
11 
     | 
    
         
             
                rake-compiler (1.0.5)
         
     | 
| 
       21 
12 
     | 
    
         
             
                  rake
         
     | 
| 
       22 
13 
     | 
    
         
             
                rspec (3.8.0)
         
     | 
| 
         @@ -39,10 +30,9 @@ PLATFORMS 
     | 
|
| 
       39 
30 
     | 
    
         
             
            DEPENDENCIES
         
     | 
| 
       40 
31 
     | 
    
         
             
              bundler (~> 1.16)
         
     | 
| 
       41 
32 
     | 
    
         
             
              multi_string_replace!
         
     | 
| 
       42 
     | 
    
         
            -
               
     | 
| 
       43 
     | 
    
         
            -
              rake (~> 10.0)
         
     | 
| 
      
 33 
     | 
    
         
            +
              rake
         
     | 
| 
       44 
34 
     | 
    
         
             
              rake-compiler
         
     | 
| 
       45 
35 
     | 
    
         
             
              rspec (~> 3.0)
         
     | 
| 
       46 
36 
     | 
    
         | 
| 
       47 
37 
     | 
    
         
             
            BUNDLED WITH
         
     | 
| 
       48 
     | 
    
         
            -
               1.16. 
     | 
| 
      
 38 
     | 
    
         
            +
               1.16.5
         
     | 
    
        data/LICENSE.txt
    CHANGED
    
    | 
         @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
     | 
|
| 
       18 
18 
     | 
    
         
             
            AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
         
     | 
| 
       19 
19 
     | 
    
         
             
            LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
         
     | 
| 
       20 
20 
     | 
    
         
             
            OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
         
     | 
| 
       21 
     | 
    
         
            -
            THE SOFTWARE.
         
     | 
| 
      
 21 
     | 
    
         
            +
            THE SOFTWARE.
         
     | 
    
        data/README.md
    CHANGED
    
    | 
         @@ -1,8 +1,15 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
             
     | 
| 
      
 2 
     | 
    
         
            +
            [](https://rubygems.org/gems/multi_string_replace)
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
       1 
5 
     | 
    
         
             
            # MultiStringReplace
         
     | 
| 
       2 
6 
     | 
    
         | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
      
 7 
     | 
    
         
            +
            A fast multiple string replace library for ruby. Uses a C implementation of the Aho–Corasick Algorithm based
         
     | 
| 
      
 8 
     | 
    
         
            +
            on https://github.com/morenice/ahocorasick while adding support for a few performance enhancements and on the
         
     | 
| 
      
 9 
     | 
    
         
            +
            fly multiple string replacement.
         
     | 
| 
       4 
10 
     | 
    
         | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
      
 11 
     | 
    
         
            +
            If Regex is not needed, this library offers significant performance advantages over String.gsub() for large string
         
     | 
| 
      
 12 
     | 
    
         
            +
            and with a large number of tokens.
         
     | 
| 
       6 
13 
     | 
    
         | 
| 
       7 
14 
     | 
    
         
             
            ## Installation
         
     | 
| 
       8 
15 
     | 
    
         | 
| 
         @@ -22,7 +29,37 @@ Or install it yourself as: 
     | 
|
| 
       22 
29 
     | 
    
         | 
| 
       23 
30 
     | 
    
         
             
            ## Usage
         
     | 
| 
       24 
31 
     | 
    
         | 
| 
       25 
     | 
    
         
            -
             
     | 
| 
      
 32 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 33 
     | 
    
         
            +
            MultiStringReplace.match("The quick brown fox jumps over the lazy dog brown", ['brown', 'fox'])
         
     | 
| 
      
 34 
     | 
    
         
            +
            # { 0 => [10, 44], 1 => [16] }
         
     | 
| 
      
 35 
     | 
    
         
            +
            MultiStringReplace.replace("The quick brown fox jumps over the lazy dog brown", {'brown' => 'black', 'fox' => 'wolf'})
         
     | 
| 
      
 36 
     | 
    
         
            +
            # The quick black wolf jumps over the lazy dog black
         
     | 
| 
      
 37 
     | 
    
         
            +
            ```
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
            You can also pass in a Proc, these will only get evaluated when the token is encountered.
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 42 
     | 
    
         
            +
            MultiStringReplace.replace("The quick brown fox jumps over the lazy dog brown", {'brown' => 'black', 'fox' => ->() { "cat" }})
         
     | 
| 
      
 43 
     | 
    
         
            +
            ```
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
            Also adds a mreplace method to String which does the same thing:
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 48 
     | 
    
         
            +
            "The quick brown fox jumps over the lazy dog brown".mreplace({'brown' => 'black', 'fox' => ->() { "cat" }})
         
     | 
| 
      
 49 
     | 
    
         
            +
            ```
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
            ## Performance
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
            Performing token replacement on a 200K text file repeated 100 times
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
            ```
         
     | 
| 
      
 56 
     | 
    
         
            +
                                     user     system      total        real
         
     | 
| 
      
 57 
     | 
    
         
            +
            multi gsub           1.322510   0.000000   1.322510 (  1.344405)
         
     | 
| 
      
 58 
     | 
    
         
            +
            MultiStringReplace   0.196823   0.007979   0.204802 (  0.207219)
         
     | 
| 
      
 59 
     | 
    
         
            +
            mreplace             0.200593   0.004031   0.204624 (  0.205379)
         
     | 
| 
      
 60 
     | 
    
         
            +
            ```
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
            Benchmark sources can be found here: <https://github.com/jedld/multi_word_replace/blob/master/bin/benchmark.rb>
         
     | 
| 
       26 
63 
     | 
    
         | 
| 
       27 
64 
     | 
    
         
             
            ## Development
         
     | 
| 
       28 
65 
     | 
    
         | 
    
        data/bin/benchmark.rb
    CHANGED
    
    | 
         @@ -1,7 +1,6 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            require "bundler/setup"
         
     | 
| 
       2 
2 
     | 
    
         
             
            require 'multi_string_replace'
         
     | 
| 
       3 
3 
     | 
    
         
             
            require 'benchmark'
         
     | 
| 
       4 
     | 
    
         
            -
            require 'pry-byebug'
         
     | 
| 
       5 
4 
     | 
    
         | 
| 
       6 
5 
     | 
    
         
             
            class String
         
     | 
| 
       7 
6 
     | 
    
         
             
              def mgsub(key_value_pairs=[].freeze)
         
     | 
| 
         @@ -27,12 +26,14 @@ replace = { 
     | 
|
| 
       27 
26 
     | 
    
         
             
              'Cras' => 'uuuuuuuu',
         
     | 
| 
       28 
27 
     | 
    
         
             
              'nunc' => 'eeeeeee',
         
     | 
| 
       29 
28 
     | 
    
         
             
              'cursus' => '乧乨乩乪乫乬乭乮乯买乱乲乳乴乵乶乷乸乹乺乻乼乽乾乿',
         
     | 
| 
      
 29 
     | 
    
         
            +
              'Vivamus' => '㐀㐁㐂㐃㐄㐅㐆㐇㐈㐉㐊㐋'
         
     | 
| 
       30 
30 
     | 
    
         
             
            }
         
     | 
| 
       31 
31 
     | 
    
         | 
| 
       32 
32 
     | 
    
         
             
            File.write('replaced.txt', body.gsub(/(#{replace.keys.join('|')})/, replace))
         
     | 
| 
       33 
33 
     | 
    
         
             
            File.write('replaced2.txt', MultiStringReplace.replace(body, replace))
         
     | 
| 
       34 
34 
     | 
    
         | 
| 
       35 
35 
     | 
    
         
             
            Benchmark.bmbm do |x|
         
     | 
| 
       36 
     | 
    
         
            -
              x.report "multi gsub" do body.mgsub(replace.map { |k, v| [/#{k}/, v] } ) end
         
     | 
| 
       37 
     | 
    
         
            -
              x.report "MultiStringReplace" do MultiStringReplace.replace(body, replace) end
         
     | 
| 
      
 36 
     | 
    
         
            +
              x.report "multi gsub" do 100.times { body.mgsub(replace.map { |k, v| [/#{k}/, v] } ) } end
         
     | 
| 
      
 37 
     | 
    
         
            +
              x.report "MultiStringReplace" do 100.times { MultiStringReplace.replace(body, replace) } end
         
     | 
| 
      
 38 
     | 
    
         
            +
              x.report "mreplace" do 100.times { body.mreplace(replace) } end
         
     | 
| 
       38 
39 
     | 
    
         
             
            end
         
     | 
| 
         @@ -29,60 +29,48 @@ bool aho_add_trie_node(struct aho_trie * restrict t, struct aho_text_t * restric 
     | 
|
| 
       29 
29 
     | 
    
         | 
| 
       30 
30 
     | 
    
         
             
                for (int text_idx = 0; text_idx < text->len; text_idx++)
         
     | 
| 
       31 
31 
     | 
    
         
             
                {
         
     | 
| 
       32 
     | 
    
         
            -
                    unsigned  
     | 
| 
      
 32 
     | 
    
         
            +
                    unsigned int node_text = text->text[text_idx];
         
     | 
| 
       33 
33 
     | 
    
         
             
                    bool find_node = false;
         
     | 
| 
       34 
34 
     | 
    
         
             
                    int child_idx = 0;
         
     | 
| 
       35 
35 
     | 
    
         | 
| 
       36 
36 
     | 
    
         
             
                    if (travasal_node->child_count == 0)
         
     | 
| 
       37 
37 
     | 
    
         
             
                    {
         
     | 
| 
       38 
38 
     | 
    
         
             
                        /* insert first node to child_list */
         
     | 
| 
       39 
     | 
    
         
            -
                         
     | 
| 
       40 
     | 
    
         
            -
                                     (struct aho_trie_node*) malloc(sizeof(struct aho_trie_node));
         
     | 
| 
       41 
     | 
    
         
            -
                        travasal_node->child_count++;
         
     | 
| 
      
 39 
     | 
    
         
            +
                        struct aho_trie_node* child = (struct aho_trie_node*) malloc(sizeof(struct aho_trie_node));
         
     | 
| 
       42 
40 
     | 
    
         | 
| 
       43 
     | 
    
         
            -
                         
     | 
| 
       44 
     | 
    
         
            -
                        travasal_node-> 
     | 
| 
       45 
     | 
    
         
            -
                        travasal_node-> 
     | 
| 
      
 41 
     | 
    
         
            +
                        travasal_node->child_list[node_text] = child;
         
     | 
| 
      
 42 
     | 
    
         
            +
                        travasal_node->first_child = child;
         
     | 
| 
      
 43 
     | 
    
         
            +
                        travasal_node->last_child = child;
         
     | 
| 
      
 44 
     | 
    
         
            +
                        travasal_node->child_count++;
         
     | 
| 
       46 
45 
     | 
    
         | 
| 
       47 
     | 
    
         
            -
                         
     | 
| 
      
 46 
     | 
    
         
            +
                        __aho_trie_node_init(child);
         
     | 
| 
      
 47 
     | 
    
         
            +
                        child->text = node_text;
         
     | 
| 
      
 48 
     | 
    
         
            +
                        child->parent = travasal_node;
         
     | 
| 
      
 49 
     | 
    
         
            +
                        child->failure_link = &(t->root);
         
     | 
| 
      
 50 
     | 
    
         
            +
                        travasal_node = child;
         
     | 
| 
       48 
51 
     | 
    
         
             
                        continue;
         
     | 
| 
       49 
52 
     | 
    
         
             
                    }
         
     | 
| 
       50 
53 
     | 
    
         | 
| 
       51 
     | 
    
         
            -
                    if (travasal_node-> 
     | 
| 
       52 
     | 
    
         
            -
                    {
         
     | 
| 
       53 
     | 
    
         
            -
                        return false;
         
     | 
| 
       54 
     | 
    
         
            -
                    }
         
     | 
| 
       55 
     | 
    
         
            -
             
     | 
| 
       56 
     | 
    
         
            -
                    for (child_idx=0; child_idx < travasal_node->child_count; child_idx++)
         
     | 
| 
       57 
     | 
    
         
            -
                    {
         
     | 
| 
       58 
     | 
    
         
            -
                        if (travasal_node->child_list[child_idx]->text == node_text )
         
     | 
| 
       59 
     | 
    
         
            -
                        {
         
     | 
| 
       60 
     | 
    
         
            -
                            find_node = true;
         
     | 
| 
       61 
     | 
    
         
            -
                            break;
         
     | 
| 
       62 
     | 
    
         
            -
                        }
         
     | 
| 
       63 
     | 
    
         
            -
                    }
         
     | 
| 
       64 
     | 
    
         
            -
             
     | 
| 
       65 
     | 
    
         
            -
                    if (find_node == true)
         
     | 
| 
      
 54 
     | 
    
         
            +
                    if ( travasal_node->child_list[node_text] != NULL)
         
     | 
| 
       66 
55 
     | 
    
         
             
                    {
         
     | 
| 
       67 
     | 
    
         
            -
                        travasal_node->child_list[ 
     | 
| 
       68 
     | 
    
         
            -
                        travasal_node = travasal_node->child_list[ 
     | 
| 
      
 56 
     | 
    
         
            +
                        travasal_node->child_list[node_text]->ref_count++;
         
     | 
| 
      
 57 
     | 
    
         
            +
                        travasal_node = travasal_node->child_list[node_text];
         
     | 
| 
       69 
58 
     | 
    
         
             
                    }
         
     | 
| 
       70 
59 
     | 
    
         
             
                    else
         
     | 
| 
       71 
60 
     | 
    
         
             
                    {
         
     | 
| 
       72 
61 
     | 
    
         
             
                        /* push_back to child_list */
         
     | 
| 
       73 
     | 
    
         
            -
                        struct aho_trie_node*  
     | 
| 
       74 
     | 
    
         
            -
             
     | 
| 
       75 
     | 
    
         
            -
                        travasal_node->child_list[travasal_node->child_count] =
         
     | 
| 
       76 
     | 
    
         
            -
                                     (struct aho_trie_node*) malloc(sizeof(struct aho_trie_node));
         
     | 
| 
      
 62 
     | 
    
         
            +
                        struct aho_trie_node* child =  (struct aho_trie_node*) malloc(sizeof(struct aho_trie_node));
         
     | 
| 
       77 
63 
     | 
    
         | 
| 
       78 
     | 
    
         
            -
                         
     | 
| 
      
 64 
     | 
    
         
            +
                        travasal_node->child_list[node_text] = child;
         
     | 
| 
       79 
65 
     | 
    
         
             
                        travasal_node->child_count++;
         
     | 
| 
       80 
     | 
    
         
            -
             
     | 
| 
       81 
     | 
    
         
            -
                         
     | 
| 
       82 
     | 
    
         
            -
             
     | 
| 
       83 
     | 
    
         
            -
                         
     | 
| 
       84 
     | 
    
         
            -
             
     | 
| 
       85 
     | 
    
         
            -
                         
     | 
| 
      
 66 
     | 
    
         
            +
                        travasal_node->last_child->next = child;
         
     | 
| 
      
 67 
     | 
    
         
            +
                        travasal_node->last_child = child;
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
                        __aho_trie_node_init(child);
         
     | 
| 
      
 70 
     | 
    
         
            +
                        child->text = node_text;
         
     | 
| 
      
 71 
     | 
    
         
            +
                        child->parent = travasal_node;
         
     | 
| 
      
 72 
     | 
    
         
            +
                        child->failure_link = &(t->root);
         
     | 
| 
      
 73 
     | 
    
         
            +
                        travasal_node = child;
         
     | 
| 
       86 
74 
     | 
    
         
             
                    }
         
     | 
| 
       87 
75 
     | 
    
         
             
                }
         
     | 
| 
       88 
76 
     | 
    
         | 
| 
         @@ -99,7 +87,6 @@ bool __aho_connect_link(struct aho_trie_node* p, struct aho_trie_node* q) 
     | 
|
| 
       99 
87 
     | 
    
         
             
            {
         
     | 
| 
       100 
88 
     | 
    
         
             
                struct aho_trie_node *pf = NULL;
         
     | 
| 
       101 
89 
     | 
    
         
             
                int i = 0;
         
     | 
| 
       102 
     | 
    
         
            -
             
     | 
| 
       103 
90 
     | 
    
         
             
                /* is root node */
         
     | 
| 
       104 
91 
     | 
    
         
             
                if (p->parent == NULL)
         
     | 
| 
       105 
92 
     | 
    
         
             
                {
         
     | 
| 
         @@ -108,26 +95,26 @@ bool __aho_connect_link(struct aho_trie_node* p, struct aho_trie_node* q) 
     | 
|
| 
       108 
95 
     | 
    
         
             
                }
         
     | 
| 
       109 
96 
     | 
    
         | 
| 
       110 
97 
     | 
    
         
             
                pf = p->failure_link;
         
     | 
| 
       111 
     | 
    
         
            -
             
     | 
| 
      
 98 
     | 
    
         
            +
             
     | 
| 
      
 99 
     | 
    
         
            +
                /* check child node of failure link(p) */
         
     | 
| 
      
 100 
     | 
    
         
            +
                if (pf->child_list[q->text] != NULL)
         
     | 
| 
       112 
101 
     | 
    
         
             
                {
         
     | 
| 
       113 
     | 
    
         
            -
                     
     | 
| 
       114 
     | 
    
         
            -
                     
     | 
| 
       115 
     | 
    
         
            -
                     
     | 
| 
       116 
     | 
    
         
            -
                        /* connect failure link */
         
     | 
| 
       117 
     | 
    
         
            -
                        q->failure_link = pf->child_list[i];
         
     | 
| 
      
 102 
     | 
    
         
            +
                    struct aho_trie_node *node = pf->child_list[q->text];
         
     | 
| 
      
 103 
     | 
    
         
            +
                    /* connect failure link */
         
     | 
| 
      
 104 
     | 
    
         
            +
                    q->failure_link =node;
         
     | 
| 
       118 
105 
     | 
    
         | 
| 
       119 
     | 
    
         
            -
             
     | 
| 
       120 
     | 
    
         
            -
             
     | 
| 
       121 
     | 
    
         
            -
             
     | 
| 
       122 
     | 
    
         
            -
             
     | 
| 
       123 
     | 
    
         
            -
                        }
         
     | 
| 
       124 
     | 
    
         
            -
                        else
         
     | 
| 
       125 
     | 
    
         
            -
                        {
         
     | 
| 
       126 
     | 
    
         
            -
                            q->output_link = pf->child_list[i]->output_link;
         
     | 
| 
       127 
     | 
    
         
            -
                        }
         
     | 
| 
       128 
     | 
    
         
            -
                        return true;
         
     | 
| 
      
 106 
     | 
    
         
            +
                    /* connect output link */
         
     | 
| 
      
 107 
     | 
    
         
            +
                    if (node->text_end)
         
     | 
| 
      
 108 
     | 
    
         
            +
                    {
         
     | 
| 
      
 109 
     | 
    
         
            +
                        q->output_link = node;
         
     | 
| 
       129 
110 
     | 
    
         
             
                    }
         
     | 
| 
      
 111 
     | 
    
         
            +
                    else
         
     | 
| 
      
 112 
     | 
    
         
            +
                    {
         
     | 
| 
      
 113 
     | 
    
         
            +
                        q->output_link = node->output_link;
         
     | 
| 
      
 114 
     | 
    
         
            +
                    }
         
     | 
| 
      
 115 
     | 
    
         
            +
                    return true;
         
     | 
| 
       130 
116 
     | 
    
         
             
                }
         
     | 
| 
      
 117 
     | 
    
         
            +
             
     | 
| 
       131 
118 
     | 
    
         
             
                return false;
         
     | 
| 
       132 
119 
     | 
    
         
             
            }
         
     | 
| 
       133 
120 
     | 
    
         | 
| 
         @@ -158,20 +145,20 @@ void aho_connect_link(struct aho_trie * restrict t) 
     | 
|
| 
       158 
145 
     | 
    
         
             
                    free(queue_node);
         
     | 
| 
       159 
146 
     | 
    
         | 
| 
       160 
147 
     | 
    
         
             
                    /* get child node list of p */
         
     | 
| 
       161 
     | 
    
         
            -
                     
     | 
| 
      
 148 
     | 
    
         
            +
                    struct aho_trie_node *child_ptr = p->first_child;
         
     | 
| 
      
 149 
     | 
    
         
            +
                    while (child_ptr != NULL)
         
     | 
| 
       162 
150 
     | 
    
         
             
                    {
         
     | 
| 
       163 
151 
     | 
    
         
             
                        struct aho_trie_node *pf = p;
         
     | 
| 
       164 
     | 
    
         
            -
             
     | 
| 
       165 
     | 
    
         
            -
                         
     | 
| 
       166 
     | 
    
         
            -
                        q = p->child_list[i];
         
     | 
| 
      
 152 
     | 
    
         
            +
                        aho_queue_enqueue(&queue, child_ptr);
         
     | 
| 
      
 153 
     | 
    
         
            +
                        q = child_ptr;
         
     | 
| 
       167 
154 
     | 
    
         | 
| 
       168 
155 
     | 
    
         
             
                        while (__aho_connect_link(pf, q) == false)
         
     | 
| 
       169 
156 
     | 
    
         
             
                        {
         
     | 
| 
       170 
157 
     | 
    
         
             
                            pf = pf->failure_link;
         
     | 
| 
       171 
158 
     | 
    
         
             
                        }
         
     | 
| 
      
 159 
     | 
    
         
            +
                        child_ptr = child_ptr->next;
         
     | 
| 
       172 
160 
     | 
    
         
             
                    }
         
     | 
| 
       173 
161 
     | 
    
         
             
                }
         
     | 
| 
       174 
     | 
    
         
            -
             
     | 
| 
       175 
162 
     | 
    
         
             
                aho_queue_destroy(&queue);
         
     | 
| 
       176 
163 
     | 
    
         
             
            }
         
     | 
| 
       177 
164 
     | 
    
         | 
| 
         @@ -180,7 +167,6 @@ void aho_clean_trie_node(struct aho_trie * restrict t) 
     | 
|
| 
       180 
167 
     | 
    
         
             
                struct aho_queue queue;
         
     | 
| 
       181 
168 
     | 
    
         
             
                aho_queue_init(&queue);
         
     | 
| 
       182 
169 
     | 
    
         
             
                aho_queue_enqueue(&queue, &(t->root));
         
     | 
| 
       183 
     | 
    
         
            -
             
     | 
| 
       184 
170 
     | 
    
         
             
                /* BFS */
         
     | 
| 
       185 
171 
     | 
    
         
             
                while (true)
         
     | 
| 
       186 
172 
     | 
    
         
             
                {
         
     | 
| 
         @@ -197,9 +183,11 @@ void aho_clean_trie_node(struct aho_trie * restrict t) 
     | 
|
| 
       197 
183 
     | 
    
         
             
                    remove_node = queue_node->data;
         
     | 
| 
       198 
184 
     | 
    
         
             
                    free(queue_node);
         
     | 
| 
       199 
185 
     | 
    
         | 
| 
       200 
     | 
    
         
            -
                     
     | 
| 
      
 186 
     | 
    
         
            +
                    struct aho_trie_node *child_ptr = remove_node->first_child;
         
     | 
| 
      
 187 
     | 
    
         
            +
                    while (child_ptr != NULL)
         
     | 
| 
       201 
188 
     | 
    
         
             
                    {
         
     | 
| 
       202 
     | 
    
         
            -
                        aho_queue_enqueue(&queue,  
     | 
| 
      
 189 
     | 
    
         
            +
                        aho_queue_enqueue(&queue, child_ptr);
         
     | 
| 
      
 190 
     | 
    
         
            +
                        child_ptr = child_ptr->next;
         
     | 
| 
       203 
191 
     | 
    
         
             
                    }
         
     | 
| 
       204 
192 
     | 
    
         | 
| 
       205 
193 
     | 
    
         
             
                    /* is root node */
         
     | 
| 
         @@ -218,14 +206,12 @@ bool __aho_find_trie_node(struct aho_trie_node** restrict start, const unsigned 
     | 
|
| 
       218 
206 
     | 
    
         
             
                int i = 0;
         
     | 
| 
       219 
207 
     | 
    
         | 
| 
       220 
208 
     | 
    
         
             
                search_node = *start;
         
     | 
| 
       221 
     | 
    
         
            -
             
     | 
| 
      
 209 
     | 
    
         
            +
             
     | 
| 
      
 210 
     | 
    
         
            +
                if (search_node->child_list[(unsigned int)text] != NULL)
         
     | 
| 
       222 
211 
     | 
    
         
             
                {
         
     | 
| 
       223 
     | 
    
         
            -
                     
     | 
| 
       224 
     | 
    
         
            -
                     
     | 
| 
       225 
     | 
    
         
            -
             
     | 
| 
       226 
     | 
    
         
            -
                        *start = search_node->child_list[i];
         
     | 
| 
       227 
     | 
    
         
            -
                        return true;
         
     | 
| 
       228 
     | 
    
         
            -
                    }
         
     | 
| 
      
 212 
     | 
    
         
            +
                    /* find it! move to find child node! */
         
     | 
| 
      
 213 
     | 
    
         
            +
                    *start = search_node->child_list[(unsigned int)text];
         
     | 
| 
      
 214 
     | 
    
         
            +
                    return true;
         
     | 
| 
       229 
215 
     | 
    
         
             
                }
         
     | 
| 
       230 
216 
     | 
    
         | 
| 
       231 
217 
     | 
    
         
             
                /* not found */
         
     | 
| 
         @@ -243,7 +229,6 @@ struct aho_text_t* aho_find_trie_node(struct aho_trie_node** restrict start, con 
     | 
|
| 
       243 
229 
     | 
    
         
             
                    {
         
     | 
| 
       244 
230 
     | 
    
         
             
                        return NULL;
         
     | 
| 
       245 
231 
     | 
    
         
             
                    }
         
     | 
| 
       246 
     | 
    
         
            -
             
     | 
| 
       247 
232 
     | 
    
         
             
                    /* retry find. move failure link. */
         
     | 
| 
       248 
233 
     | 
    
         
             
                    *start = (*start)->failure_link;
         
     | 
| 
       249 
234 
     | 
    
         
             
                }
         
     | 
| 
         @@ -260,7 +245,6 @@ struct aho_text_t* aho_find_trie_node(struct aho_trie_node** restrict start, con 
     | 
|
| 
       260 
245 
     | 
    
         
             
                {
         
     | 
| 
       261 
246 
     | 
    
         
             
                    return (*start)->output_link->output_text;
         
     | 
| 
       262 
247 
     | 
    
         
             
                }
         
     | 
| 
       263 
     | 
    
         
            -
             
     | 
| 
       264 
248 
     | 
    
         
             
                /* keep going */
         
     | 
| 
       265 
249 
     | 
    
         
             
                return NULL;
         
     | 
| 
       266 
250 
     | 
    
         
             
            }
         
     | 
| 
         @@ -286,10 +270,11 @@ void aho_print_trie(struct aho_trie * restrict t) 
     | 
|
| 
       286 
270 
     | 
    
         | 
| 
       287 
271 
     | 
    
         
             
                    travasal_node = queue_node->data;
         
     | 
| 
       288 
272 
     | 
    
         
             
                    free(queue_node);
         
     | 
| 
       289 
     | 
    
         
            -
             
     | 
| 
       290 
     | 
    
         
            -
                     
     | 
| 
      
 273 
     | 
    
         
            +
                    struct aho_trie_node *child_ptr = travasal_node->first_child;
         
     | 
| 
      
 274 
     | 
    
         
            +
                    while (child_ptr != NULL)
         
     | 
| 
       291 
275 
     | 
    
         
             
                    {
         
     | 
| 
       292 
     | 
    
         
            -
                        aho_queue_enqueue(&queue,  
     | 
| 
      
 276 
     | 
    
         
            +
                        aho_queue_enqueue(&queue, child_ptr);
         
     | 
| 
      
 277 
     | 
    
         
            +
                        child_ptr = child_ptr->next;
         
     | 
| 
       293 
278 
     | 
    
         
             
                    }
         
     | 
| 
       294 
279 
     | 
    
         | 
| 
       295 
280 
     | 
    
         
             
                    /* is root node */
         
     | 
| 
         @@ -11,6 +11,9 @@ struct aho_trie_node 
     | 
|
| 
       11 
11 
     | 
    
         | 
| 
       12 
12 
     | 
    
         
             
                struct aho_trie_node* parent;
         
     | 
| 
       13 
13 
     | 
    
         
             
                struct aho_trie_node* child_list[MAX_AHO_CHILD_NODE];
         
     | 
| 
      
 14 
     | 
    
         
            +
                struct aho_trie_node* first_child;
         
     | 
| 
      
 15 
     | 
    
         
            +
                struct aho_trie_node* last_child;
         
     | 
| 
      
 16 
     | 
    
         
            +
                struct aho_trie_node* next;
         
     | 
| 
       14 
17 
     | 
    
         
             
                unsigned int child_count;
         
     | 
| 
       15 
18 
     | 
    
         | 
| 
       16 
19 
     | 
    
         
             
                bool text_end;
         
     | 
| 
         @@ -1,3 +1,15 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
             
     | 
| 
      
 2 
     | 
    
         
            +
            // MIT License
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            // Copyright (c) 2017 morenice
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            // Permission is hereby granted, free of charge, to any person obtaining a copy
         
     | 
| 
      
 7 
     | 
    
         
            +
            // of this software and associated documentation files (the "Software"), to deal
         
     | 
| 
      
 8 
     | 
    
         
            +
            // in the Software without restriction, including without limitation the rights
         
     | 
| 
      
 9 
     | 
    
         
            +
            // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
         
     | 
| 
      
 10 
     | 
    
         
            +
            // copies of the Software, and to permit persons to whom the Software is
         
     | 
| 
      
 11 
     | 
    
         
            +
            // furnished to do so, subject to the following conditions:
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
       1 
13 
     | 
    
         
             
            #include <limits.h>
         
     | 
| 
       2 
14 
     | 
    
         
             
            #include <string.h>
         
     | 
| 
       3 
15 
     | 
    
         
             
            #include <stdlib.h>
         
     | 
| 
         @@ -20,6 +32,7 @@ void aho_destroy(struct ahocorasick * restrict aho) 
     | 
|
| 
       20 
32 
     | 
    
         
             
            int aho_add_match_text(struct ahocorasick * restrict aho, const char* text, unsigned int len)
         
     | 
| 
       21 
33 
     | 
    
         
             
            {
         
     | 
| 
       22 
34 
     | 
    
         
             
                struct aho_text_t* a_text = NULL;
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
       23 
36 
     | 
    
         
             
                if (aho->accumulate_text_id == AHO_MAX_TEXT_ID)
         
     | 
| 
       24 
37 
     | 
    
         
             
                {
         
     | 
| 
       25 
38 
     | 
    
         
             
                    return -1;
         
     | 
| 
         @@ -30,11 +43,14 @@ int aho_add_match_text(struct ahocorasick * restrict aho, const char* text, unsi 
     | 
|
| 
       30 
43 
     | 
    
         
             
                    goto lack_free_mem;
         
     | 
| 
       31 
44 
     | 
    
         | 
| 
       32 
45 
     | 
    
         
             
                a_text->text = (char*) malloc(sizeof(char)*len);
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
       33 
47 
     | 
    
         
             
                if (!a_text->text)
         
     | 
| 
       34 
48 
     | 
    
         
             
                    goto lack_free_mem;
         
     | 
| 
       35 
49 
     | 
    
         | 
| 
       36 
50 
     | 
    
         
             
                a_text->id = aho->accumulate_text_id++;
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
       37 
52 
     | 
    
         
             
                memcpy(a_text->text, text, len);
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
       38 
54 
     | 
    
         
             
                a_text->len = len;
         
     | 
| 
       39 
55 
     | 
    
         
             
                a_text->prev = NULL;
         
     | 
| 
       40 
56 
     | 
    
         
             
                a_text->next = NULL;
         
     | 
| 
         @@ -54,6 +70,7 @@ int aho_add_match_text(struct ahocorasick * restrict aho, const char* text, unsi 
     | 
|
| 
       54 
70 
     | 
    
         
             
                return a_text->id;
         
     | 
| 
       55 
71 
     | 
    
         | 
| 
       56 
72 
     | 
    
         
             
            lack_free_mem:
         
     | 
| 
      
 73 
     | 
    
         
            +
                
         
     | 
| 
       57 
74 
     | 
    
         
             
                return -1;
         
     | 
| 
       58 
75 
     | 
    
         
             
            }
         
     | 
| 
       59 
76 
     | 
    
         | 
| 
         @@ -110,7 +127,9 @@ void aho_create_trie(struct ahocorasick * restrict aho) 
     | 
|
| 
       110 
127 
     | 
    
         | 
| 
       111 
128 
     | 
    
         
             
                for (iter = aho->text_list_head; iter != NULL; iter = iter->next)
         
     | 
| 
       112 
129 
     | 
    
         
             
                {
         
     | 
| 
      
 130 
     | 
    
         
            +
             
     | 
| 
       113 
131 
     | 
    
         
             
                    aho_add_trie_node(&(aho->trie), iter);
         
     | 
| 
      
 132 
     | 
    
         
            +
             
     | 
| 
       114 
133 
     | 
    
         
             
                }
         
     | 
| 
       115 
134 
     | 
    
         | 
| 
       116 
135 
     | 
    
         
             
                aho_connect_link(&(aho->trie));
         
     | 
| 
         @@ -162,7 +181,8 @@ unsigned int aho_findtext(struct ahocorasick * restrict aho, const char* data, u 
     | 
|
| 
       162 
181 
     | 
    
         
             
                return match_count;
         
     | 
| 
       163 
182 
     | 
    
         
             
            }
         
     | 
| 
       164 
183 
     | 
    
         | 
| 
       165 
     | 
    
         
            -
            VALUE aho_replace_text(struct ahocorasick * restrict aho, const char* data, 
     | 
| 
      
 184 
     | 
    
         
            +
            VALUE aho_replace_text(struct ahocorasick * restrict aho, const char* data,
         
     | 
| 
      
 185 
     | 
    
         
            +
                unsigned long long data_len, char *values[], long value_sizes[], VALUE ruby_values[])
         
     | 
| 
       166 
186 
     | 
    
         
             
            {
         
     | 
| 
       167 
187 
     | 
    
         
             
                int i = 0;
         
     | 
| 
       168 
188 
     | 
    
         
             
                int match_count = 0;
         
     | 
| 
         @@ -191,14 +211,18 @@ VALUE aho_replace_text(struct ahocorasick * restrict aho, const char* data, unsi 
     | 
|
| 
       191 
211 
     | 
    
         
             
                    }
         
     | 
| 
       192 
212 
     | 
    
         | 
| 
       193 
213 
     | 
    
         
             
                    // concatenate from last_concat_pos
         
     | 
| 
       194 
     | 
    
         
            -
                     
     | 
| 
      
 214 
     | 
    
         
            +
                    if (pos > last_concat_pos) {
         
     | 
| 
      
 215 
     | 
    
         
            +
                        rb_str_cat(main_result, &data[last_concat_pos], pos - last_concat_pos);
         
     | 
| 
      
 216 
     | 
    
         
            +
                    }
         
     | 
| 
      
 217 
     | 
    
         
            +
             
     | 
| 
       195 
218 
     | 
    
         
             
                    // concatenate replace
         
     | 
| 
       196 
219 
     | 
    
         
             
                    if (values[result->id] == NULL) {
         
     | 
| 
       197 
220 
     | 
    
         
             
                        VALUE proc_result = rb_funcall(ruby_values[result->id], rb_intern("call"), 0);
         
     | 
| 
       198 
     | 
    
         
            -
                         
     | 
| 
      
 221 
     | 
    
         
            +
                        value_sizes[result->id] = RSTRING_LEN(proc_result);
         
     | 
| 
      
 222 
     | 
    
         
            +
                        values[result->id] = StringValuePtr(proc_result);
         
     | 
| 
       199 
223 
     | 
    
         
             
                    }
         
     | 
| 
       200 
224 
     | 
    
         | 
| 
       201 
     | 
    
         
            -
                     
     | 
| 
      
 225 
     | 
    
         
            +
                    rb_str_cat(main_result, values[result->id], value_sizes[result->id]); 
         
     | 
| 
       202 
226 
     | 
    
         
             
                    last_concat_pos = i + 1;
         
     | 
| 
       203 
227 
     | 
    
         
             
                }
         
     | 
| 
       204 
228 
     | 
    
         |