prefix-machine 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +57 -0
- data/.vscode/c_cpp_properties.json +18 -0
- data/Gemfile +7 -0
- data/LICENSE +29 -0
- data/README.md +15 -0
- data/Rakefile +16 -0
- data/ext/prefix_machine/extconf.rb +6 -0
- data/ext/prefix_machine/prefix_machine.c +136 -0
- data/ext/prefix_machine/prefix_machine.h +41 -0
- data/lib/prefix-machine.rb +24 -0
- data/lib/prefix-machine/version.rb +3 -0
- data/prefix-machine.gemspec +27 -0
- metadata +72 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 4ba301f0538645c4e2dac72ed28b63f98444f6e199be9c4cd5ff6c18ad8aa427
|
4
|
+
data.tar.gz: 81fc79eb245505fe29305568dbc2d9b15cde2d22b6619919aca903723805e00a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 85fb7380ae8915832d6c70198385ecfaa498adbc56bb02125689545ee2dcb4f8ae493f15e796a7d5a58ff1f7213e01bbcbe99762c0b2f002ebcf35ffa7a8c629
|
7
|
+
data.tar.gz: 671bce8dd2b8f991b171bcf247d7bfe705522614aff0649308cb36030f66bc5680eceb7bf7a475a4a3dd5db014c493caeef9bc222aa3c4928f0a79454a387f9a
|
data/.gitignore
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
/.config
|
4
|
+
/coverage/
|
5
|
+
/InstalledFiles
|
6
|
+
/pkg/
|
7
|
+
/spec/reports/
|
8
|
+
/spec/examples.txt
|
9
|
+
/test/tmp/
|
10
|
+
/test/version_tmp/
|
11
|
+
/tmp/
|
12
|
+
|
13
|
+
# Used by dotenv library to load environment variables.
|
14
|
+
# .env
|
15
|
+
|
16
|
+
# Ignore Byebug command history file.
|
17
|
+
.byebug_history
|
18
|
+
|
19
|
+
## Specific to RubyMotion:
|
20
|
+
.dat*
|
21
|
+
.repl_history
|
22
|
+
build/
|
23
|
+
*.bridgesupport
|
24
|
+
build-iPhoneOS/
|
25
|
+
build-iPhoneSimulator/
|
26
|
+
|
27
|
+
## Specific to RubyMotion (use of CocoaPods):
|
28
|
+
#
|
29
|
+
# We recommend against adding the Pods directory to your .gitignore. However
|
30
|
+
# you should judge for yourself, the pros and cons are mentioned at:
|
31
|
+
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
|
32
|
+
#
|
33
|
+
# vendor/Pods/
|
34
|
+
|
35
|
+
## Documentation cache and generated files:
|
36
|
+
/.yardoc/
|
37
|
+
/_yardoc/
|
38
|
+
/doc/
|
39
|
+
/rdoc/
|
40
|
+
|
41
|
+
## Environment normalization:
|
42
|
+
/.bundle/
|
43
|
+
/vendor/bundle
|
44
|
+
/lib/bundler/man/
|
45
|
+
|
46
|
+
# for a library or gem, you might want to ignore these files since the code is
|
47
|
+
# intended to run in multiple environments; otherwise, check them in:
|
48
|
+
Gemfile.lock
|
49
|
+
# .ruby-version
|
50
|
+
# .ruby-gemset
|
51
|
+
|
52
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
53
|
+
.rvmrc
|
54
|
+
|
55
|
+
# Used by RuboCop. Remote config files pulled in from inherit_from directive.
|
56
|
+
# .rubocop-https?--*
|
57
|
+
*.so
|
@@ -0,0 +1,18 @@
|
|
1
|
+
{
|
2
|
+
"configurations": [
|
3
|
+
{
|
4
|
+
"name": "Linux",
|
5
|
+
"includePath": [
|
6
|
+
"${workspaceFolder}/**",
|
7
|
+
"/home/delton/.rvm/src/ruby-2.7.1/include",
|
8
|
+
"/home/delton/.rvm/rubies/ruby-2.7.1/include/ruby-2.7.0/x86_64-linux/"
|
9
|
+
],
|
10
|
+
"defines": [],
|
11
|
+
"compilerPath": "/usr/bin/gcc",
|
12
|
+
"cStandard": "c11",
|
13
|
+
"cppStandard": "c++17",
|
14
|
+
"intelliSenseMode": "clang-x64"
|
15
|
+
}
|
16
|
+
],
|
17
|
+
"version": 4
|
18
|
+
}
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
BSD 3-Clause License
|
2
|
+
|
3
|
+
Copyright (c) 2020, Delton Ding
|
4
|
+
All rights reserved.
|
5
|
+
|
6
|
+
Redistribution and use in source and binary forms, with or without
|
7
|
+
modification, are permitted provided that the following conditions are met:
|
8
|
+
|
9
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
10
|
+
list of conditions and the following disclaimer.
|
11
|
+
|
12
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
13
|
+
this list of conditions and the following disclaimer in the documentation
|
14
|
+
and/or other materials provided with the distribution.
|
15
|
+
|
16
|
+
3. Neither the name of the copyright holder nor the names of its
|
17
|
+
contributors may be used to endorse or promote products derived from
|
18
|
+
this software without specific prior written permission.
|
19
|
+
|
20
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
21
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
22
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
23
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
24
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
25
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
26
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
27
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
28
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
29
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
# prefix-machine
|
2
|
+
A High Performance Prefix Match Machine for Ruby
|
3
|
+
|
4
|
+
```ruby
|
5
|
+
require 'prefix-machine'
|
6
|
+
|
7
|
+
machine = PrefixMachine.new
|
8
|
+
machine << 'hello.'
|
9
|
+
machine << 'fork.ai.'
|
10
|
+
machine << 'fork.human.'
|
11
|
+
machine.match('hello.world') # => 'hello.'
|
12
|
+
machine.match('fork.ai.sdk') # => 'fork.ai.'
|
13
|
+
machine.match('fork.human.resources') # => 'fork.human.'
|
14
|
+
machine.match('refute') # => nil
|
15
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rake/testtask"
|
3
|
+
require 'rake/extensiontask'
|
4
|
+
|
5
|
+
spec = Gem::Specification.load('prefix-machine.gemspec')
|
6
|
+
Rake::ExtensionTask.new('prefix_machine_ext', spec) do |ext|
|
7
|
+
ext.ext_dir = "ext/prefix_machine"
|
8
|
+
end
|
9
|
+
|
10
|
+
Rake::TestTask.new(:test) do |t|
|
11
|
+
t.libs << "test"
|
12
|
+
t.libs << "lib"
|
13
|
+
t.test_files = FileList["test/**/*_test.rb"]
|
14
|
+
end
|
15
|
+
|
16
|
+
task :default => :test
|
@@ -0,0 +1,136 @@
|
|
1
|
+
#include "prefix_machine.h"
|
2
|
+
|
3
|
+
void Init_prefix_machine_ext() {
|
4
|
+
kPrefixMachine = rb_define_class("PrefixMachine", rb_cObject);
|
5
|
+
kPrefixMachineTrie = rb_define_class("PrefixMachineTrie", rb_cObject);
|
6
|
+
|
7
|
+
rb_define_method(kPrefixMachine, "initialize", method_prefix_machine_initialize, 0);
|
8
|
+
rb_define_private_method(kPrefixMachine, "trie_insert", method_private_prefix_machine_insert_trie, 1);
|
9
|
+
rb_define_private_method(kPrefixMachine, "trie_match", method_private_prefix_machine_match, 1);
|
10
|
+
}
|
11
|
+
|
12
|
+
VALUE method_prefix_machine_initialize(VALUE self) {
|
13
|
+
struct trie* trie = xmalloc(sizeof(struct trie));
|
14
|
+
char* empty_str = xmalloc(sizeof(char));
|
15
|
+
empty_str[0] = '\0';
|
16
|
+
trie->trace = empty_str;
|
17
|
+
trie->matched = FALSE;
|
18
|
+
|
19
|
+
for (size_t i = 0; i < TOKENS; i++) {
|
20
|
+
trie->children[i] = NULL;
|
21
|
+
}
|
22
|
+
|
23
|
+
VALUE rules = TypedData_Wrap_Struct(kPrefixMachineTrie, &type_trie, trie);
|
24
|
+
rb_iv_set(self, "@rules", rules);
|
25
|
+
rb_iv_set(self, "@count", SIZET2NUM(0));
|
26
|
+
|
27
|
+
return self;
|
28
|
+
}
|
29
|
+
|
30
|
+
VALUE method_private_prefix_machine_insert_trie(VALUE self, VALUE prefix) {
|
31
|
+
struct trie* root = internal_trie_get(rb_iv_get(self, "@rules"));
|
32
|
+
char* prefix_str = StringValueCStr(prefix);
|
33
|
+
size_t len = strlen(prefix_str);
|
34
|
+
internal_prefix_machine_insert_trie(root, prefix_str, len, 0);
|
35
|
+
rb_iv_set(self, "@count", SIZET2NUM(NUM2SIZET(rb_iv_get(self, "@count")) + 1));
|
36
|
+
return Qnil;
|
37
|
+
}
|
38
|
+
|
39
|
+
VALUE method_private_prefix_machine_match(VALUE self, VALUE str) {
|
40
|
+
struct trie* root = internal_trie_get(rb_iv_get(self, "@rules"));
|
41
|
+
char* str_ptr = StringValueCStr(str);
|
42
|
+
size_t len = strlen(str_ptr);
|
43
|
+
char* result = internal_prefix_machine_match(root, str_ptr, len, 0);
|
44
|
+
|
45
|
+
if (result == NULL) {
|
46
|
+
return Qnil;
|
47
|
+
}
|
48
|
+
|
49
|
+
return rb_str_new2(result);
|
50
|
+
}
|
51
|
+
|
52
|
+
size_t internal_compact_tokens(char c) {
|
53
|
+
if (c >= '0' && c <= '9') {
|
54
|
+
return c - '0';
|
55
|
+
}
|
56
|
+
|
57
|
+
if (c >= 'a' && c <= 'z') {
|
58
|
+
return c - 'a' + 11;
|
59
|
+
}
|
60
|
+
|
61
|
+
if (c == '.') {
|
62
|
+
return 36;
|
63
|
+
}
|
64
|
+
|
65
|
+
rb_raise(rb_eNoMatchingPatternError, "Pattern Not Supported.");
|
66
|
+
}
|
67
|
+
|
68
|
+
void internal_prefix_machine_insert_trie(struct trie* node, char* rule, size_t len, size_t offset) {
|
69
|
+
// Check boundary first
|
70
|
+
if (len == offset) {
|
71
|
+
node->matched = TRUE;
|
72
|
+
return;
|
73
|
+
}
|
74
|
+
|
75
|
+
char c = rule[offset];
|
76
|
+
size_t index = internal_compact_tokens(c);
|
77
|
+
if (node->children[index] != NULL) {
|
78
|
+
return internal_prefix_machine_insert_trie(node->children[index], rule, len, offset + 1);
|
79
|
+
}
|
80
|
+
|
81
|
+
// Create the node
|
82
|
+
struct trie* new_node = xmalloc(sizeof(struct trie));
|
83
|
+
char* trace = xmalloc(sizeof(char) * (offset + 2));
|
84
|
+
memcpy(trace, rule, offset);
|
85
|
+
trace[offset] = c;
|
86
|
+
trace[offset + 1] = '\0'; // NUL-terminated
|
87
|
+
new_node->trace = trace;
|
88
|
+
new_node->matched = FALSE;
|
89
|
+
|
90
|
+
for (size_t i = 0; i < TOKENS; i++) {
|
91
|
+
new_node->children[i] = NULL;
|
92
|
+
}
|
93
|
+
|
94
|
+
node->children[index] = new_node;
|
95
|
+
|
96
|
+
return internal_prefix_machine_insert_trie(new_node, rule, len, offset + 1);
|
97
|
+
}
|
98
|
+
|
99
|
+
char* internal_prefix_machine_match(struct trie* node, char* str, size_t len, size_t offset) {
|
100
|
+
if (node->matched == TRUE) {
|
101
|
+
return node->trace;
|
102
|
+
}
|
103
|
+
|
104
|
+
if (len == offset) {
|
105
|
+
return NULL;
|
106
|
+
}
|
107
|
+
|
108
|
+
size_t index = internal_compact_tokens(str[offset]);
|
109
|
+
|
110
|
+
if (node->children[index] == NULL) {
|
111
|
+
return NULL;
|
112
|
+
}
|
113
|
+
|
114
|
+
return internal_prefix_machine_match(node->children[index], str, len, offset + 1);
|
115
|
+
}
|
116
|
+
|
117
|
+
struct trie* internal_trie_get(VALUE wrapped) {
|
118
|
+
struct trie* ptr;
|
119
|
+
TypedData_Get_Struct(wrapped, struct trie, &type_trie, ptr);
|
120
|
+
return ptr;
|
121
|
+
}
|
122
|
+
|
123
|
+
void internal_trie_free(void* ptr) {
|
124
|
+
struct trie* root = (struct trie*) ptr;
|
125
|
+
for (size_t i = 0; i < TOKENS; i++) {
|
126
|
+
if (root->children[i] != NULL) {
|
127
|
+
internal_trie_free(root->children[i]);
|
128
|
+
}
|
129
|
+
}
|
130
|
+
xfree(root->trace);
|
131
|
+
xfree(root);
|
132
|
+
}
|
133
|
+
|
134
|
+
size_t internal_trie_size(const void* ptr) {
|
135
|
+
return 0; // Fetch size with O(n) algorithm is too costly.
|
136
|
+
}
|
@@ -0,0 +1,41 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
|
3
|
+
#define TOKENS 37 // 26 alphabets + 10 numbers + dot symbol
|
4
|
+
#define TRUE 1
|
5
|
+
#define FALSE 0
|
6
|
+
|
7
|
+
struct trie {
|
8
|
+
int matched;
|
9
|
+
char* trace;
|
10
|
+
struct trie* children[TOKENS];
|
11
|
+
};
|
12
|
+
|
13
|
+
VALUE kPrefixMachine = Qnil;
|
14
|
+
VALUE kPrefixMachineTrie = Qnil;
|
15
|
+
|
16
|
+
void Init_prefix_machine_ext();
|
17
|
+
VALUE method_prefix_machine_initialize(VALUE self);
|
18
|
+
|
19
|
+
// private methods
|
20
|
+
VALUE method_private_prefix_machine_insert_trie(VALUE self, VALUE prefix);
|
21
|
+
VALUE method_private_prefix_machine_match(VALUE self, VALUE str);
|
22
|
+
|
23
|
+
// internal methods
|
24
|
+
size_t internal_compact_tokens(char c);
|
25
|
+
void internal_prefix_machine_insert_trie(struct trie* node, char* rule, size_t len, size_t offset);
|
26
|
+
char* internal_prefix_machine_match(struct trie* node, char* str, size_t len, size_t offset);
|
27
|
+
|
28
|
+
struct trie* internal_trie_get(VALUE wrapped);
|
29
|
+
void internal_trie_free(void* ptr);
|
30
|
+
size_t internal_trie_size(const void* ptr);
|
31
|
+
|
32
|
+
static const rb_data_type_t type_trie = {
|
33
|
+
.wrap_struct_name = "trie",
|
34
|
+
.function = {
|
35
|
+
.dmark = NULL,
|
36
|
+
.dfree = internal_trie_free,
|
37
|
+
.dsize = internal_trie_size,
|
38
|
+
},
|
39
|
+
.data = NULL,
|
40
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
41
|
+
};
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'prefix_machine_ext'
|
2
|
+
require './prefix-machine/version'
|
3
|
+
|
4
|
+
class PrefixMachine
|
5
|
+
attr_reader :count
|
6
|
+
|
7
|
+
def guard!(str)
|
8
|
+
raise NoMatchingPatternError.new('No Such Patterns') unless str =~ /[0-9a-z.]*/
|
9
|
+
end
|
10
|
+
|
11
|
+
def insert(rule)
|
12
|
+
s = rule.downcase
|
13
|
+
guard!(s)
|
14
|
+
trie_insert(s)
|
15
|
+
end
|
16
|
+
|
17
|
+
def match(str)
|
18
|
+
s = str.downcase
|
19
|
+
guard!(s)
|
20
|
+
trie_match(s)
|
21
|
+
end
|
22
|
+
|
23
|
+
alias_method :<<, :insert
|
24
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require_relative 'lib/prefix-machine/version'
|
2
|
+
|
3
|
+
Gem::Specification.new do |spec|
|
4
|
+
spec.name = "prefix-machine"
|
5
|
+
spec.version = PrefixMachine::VERSION
|
6
|
+
spec.authors = ["Delton Ding"]
|
7
|
+
spec.email = ["dsh0416@gmail.com"]
|
8
|
+
|
9
|
+
spec.summary = "The gem to match class prefixes."
|
10
|
+
spec.description = "The gem to match class prefixes."
|
11
|
+
spec.homepage = "https://github.com/dsh0416/prefix-machine"
|
12
|
+
spec.license = 'BSD-3-Clause'
|
13
|
+
spec.required_ruby_version = '>= 2.7.1'
|
14
|
+
|
15
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
16
|
+
spec.metadata["source_code_uri"] = "https://github.com/dsh0416/prefix-machine"
|
17
|
+
|
18
|
+
# Specify which files should be added to the gem when it is released.
|
19
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
20
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
21
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
22
|
+
end
|
23
|
+
spec.require_paths = ["lib"]
|
24
|
+
spec.extensions = ['ext/prefix_machine/extconf.rb']
|
25
|
+
|
26
|
+
spec.add_development_dependency 'rake-compiler', '~> 1.0'
|
27
|
+
end
|
metadata
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: prefix-machine
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Delton Ding
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-09-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake-compiler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.0'
|
27
|
+
description: The gem to match class prefixes.
|
28
|
+
email:
|
29
|
+
- dsh0416@gmail.com
|
30
|
+
executables: []
|
31
|
+
extensions:
|
32
|
+
- ext/prefix_machine/extconf.rb
|
33
|
+
extra_rdoc_files: []
|
34
|
+
files:
|
35
|
+
- ".gitignore"
|
36
|
+
- ".vscode/c_cpp_properties.json"
|
37
|
+
- Gemfile
|
38
|
+
- LICENSE
|
39
|
+
- README.md
|
40
|
+
- Rakefile
|
41
|
+
- ext/prefix_machine/extconf.rb
|
42
|
+
- ext/prefix_machine/prefix_machine.c
|
43
|
+
- ext/prefix_machine/prefix_machine.h
|
44
|
+
- lib/prefix-machine.rb
|
45
|
+
- lib/prefix-machine/version.rb
|
46
|
+
- prefix-machine.gemspec
|
47
|
+
homepage: https://github.com/dsh0416/prefix-machine
|
48
|
+
licenses:
|
49
|
+
- BSD-3-Clause
|
50
|
+
metadata:
|
51
|
+
homepage_uri: https://github.com/dsh0416/prefix-machine
|
52
|
+
source_code_uri: https://github.com/dsh0416/prefix-machine
|
53
|
+
post_install_message:
|
54
|
+
rdoc_options: []
|
55
|
+
require_paths:
|
56
|
+
- lib
|
57
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 2.7.1
|
62
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
requirements: []
|
68
|
+
rubygems_version: 3.1.2
|
69
|
+
signing_key:
|
70
|
+
specification_version: 4
|
71
|
+
summary: The gem to match class prefixes.
|
72
|
+
test_files: []
|