melisa 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +11 -0
- data/ext/marisa/bindings/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/marisa-swig.h +183 -0
- data/ext/marisa/bindings/perl/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/perl/marisa-swig.h +183 -0
- data/ext/marisa/bindings/perl/marisa-swig_wrap.cxx +5160 -0
- data/ext/marisa/bindings/python/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/python/marisa-swig.h +183 -0
- data/ext/marisa/bindings/python/marisa-swig_wrap.cxx +6090 -0
- data/ext/marisa/bindings/ruby/extconf.rb +5 -0
- data/ext/marisa/bindings/ruby/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/ruby/marisa-swig.h +183 -0
- data/ext/marisa/bindings/ruby/marisa-swig_wrap.cxx +4708 -0
- data/ext/marisa/lib/marisa.h +14 -0
- data/ext/marisa/lib/marisa/agent.cc +51 -0
- data/ext/marisa/lib/marisa/agent.h +73 -0
- data/ext/marisa/lib/marisa/base.h +193 -0
- data/ext/marisa/lib/marisa/exception.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/algorithm.h +26 -0
- data/ext/marisa/lib/marisa/grimoire/algorithm/sort.h +196 -0
- data/ext/marisa/lib/marisa/grimoire/intrin.h +115 -0
- data/ext/marisa/lib/marisa/grimoire/io.h +18 -0
- data/ext/marisa/lib/marisa/grimoire/io/mapper.cc +163 -0
- data/ext/marisa/lib/marisa/grimoire/io/mapper.h +67 -0
- data/ext/marisa/lib/marisa/grimoire/io/reader.cc +147 -0
- data/ext/marisa/lib/marisa/grimoire/io/reader.h +66 -0
- data/ext/marisa/lib/marisa/grimoire/io/writer.cc +148 -0
- data/ext/marisa/lib/marisa/grimoire/io/writer.h +65 -0
- data/ext/marisa/lib/marisa/grimoire/trie.h +16 -0
- data/ext/marisa/lib/marisa/grimoire/trie/cache.h +81 -0
- data/ext/marisa/lib/marisa/grimoire/trie/config.h +155 -0
- data/ext/marisa/lib/marisa/grimoire/trie/entry.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/trie/header.h +61 -0
- data/ext/marisa/lib/marisa/grimoire/trie/history.h +65 -0
- data/ext/marisa/lib/marisa/grimoire/trie/key.h +228 -0
- data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.cc +876 -0
- data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.h +134 -0
- data/ext/marisa/lib/marisa/grimoire/trie/range.h +115 -0
- data/ext/marisa/lib/marisa/grimoire/trie/state.h +117 -0
- data/ext/marisa/lib/marisa/grimoire/trie/tail.cc +218 -0
- data/ext/marisa/lib/marisa/grimoire/trie/tail.h +72 -0
- data/ext/marisa/lib/marisa/grimoire/vector.h +18 -0
- data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.cc +826 -0
- data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.h +179 -0
- data/ext/marisa/lib/marisa/grimoire/vector/flat-vector.h +205 -0
- data/ext/marisa/lib/marisa/grimoire/vector/pop-count.h +110 -0
- data/ext/marisa/lib/marisa/grimoire/vector/rank-index.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/vector/vector.h +256 -0
- data/ext/marisa/lib/marisa/iostream.h +18 -0
- data/ext/marisa/lib/marisa/key.h +85 -0
- data/ext/marisa/lib/marisa/keyset.cc +181 -0
- data/ext/marisa/lib/marisa/keyset.h +80 -0
- data/ext/marisa/lib/marisa/query.h +71 -0
- data/ext/marisa/lib/marisa/scoped-array.h +48 -0
- data/ext/marisa/lib/marisa/scoped-ptr.h +52 -0
- data/ext/marisa/lib/marisa/stdio.h +15 -0
- data/ext/marisa/lib/marisa/trie.cc +249 -0
- data/ext/marisa/lib/marisa/trie.h +64 -0
- data/ext/marisa/tests/base-test.cc +309 -0
- data/ext/marisa/tests/io-test.cc +252 -0
- data/ext/marisa/tests/marisa-assert.h +26 -0
- data/ext/marisa/tests/marisa-test.cc +388 -0
- data/ext/marisa/tests/trie-test.cc +507 -0
- data/ext/marisa/tests/vector-test.cc +466 -0
- data/ext/marisa/tools/cmdopt.cc +298 -0
- data/ext/marisa/tools/cmdopt.h +58 -0
- data/ext/marisa/tools/marisa-benchmark.cc +418 -0
- data/ext/marisa/tools/marisa-build.cc +206 -0
- data/ext/marisa/tools/marisa-common-prefix-search.cc +143 -0
- data/ext/marisa/tools/marisa-dump.cc +151 -0
- data/ext/marisa/tools/marisa-lookup.cc +110 -0
- data/ext/marisa/tools/marisa-predictive-search.cc +143 -0
- data/ext/marisa/tools/marisa-reverse-lookup.cc +110 -0
- data/lib/melisa.rb +7 -0
- data/lib/melisa/base_config_flags.rb +76 -0
- data/lib/melisa/bytes_trie.rb +55 -0
- data/lib/melisa/int_trie.rb +14 -0
- data/lib/melisa/search.rb +55 -0
- data/lib/melisa/trie.rb +96 -0
- data/lib/melisa/version.rb +3 -0
- data/melisa.gemspec +36 -0
- data/spec/base_config_flags_spec.rb +73 -0
- data/spec/bytes_trie_spec.rb +16 -0
- data/spec/int_trie_spec.rb +16 -0
- data/spec/search_spec.rb +29 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/trie_spec.rb +30 -0
- metadata +207 -0
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
module Melisa
|
|
2
|
+
VALUE_SEPARATOR = "\xff"
|
|
3
|
+
|
|
4
|
+
class BytesTrie < Trie
|
|
5
|
+
def initialize(hash={}, separator=VALUE_SEPARATOR, opts={})
|
|
6
|
+
super([], [], opts)
|
|
7
|
+
|
|
8
|
+
@sep = separator
|
|
9
|
+
@sep_c = separator.force_encoding('binary').ord
|
|
10
|
+
|
|
11
|
+
add_many(hash, [])
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def add_many(hash, weights)
|
|
15
|
+
for key, value in hash
|
|
16
|
+
push(raw_key(key, value))
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def include?(key)
|
|
21
|
+
super(key + @sep)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def get(key)
|
|
25
|
+
build unless @built
|
|
26
|
+
agent = Marisa::Agent.new
|
|
27
|
+
agent.set_query(key + @sep)
|
|
28
|
+
if @trie.predictive_search(agent)
|
|
29
|
+
agent_key_value(agent)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
alias :[] :get
|
|
33
|
+
|
|
34
|
+
# Search for many results with a given prefix
|
|
35
|
+
def get_all(key)
|
|
36
|
+
build unless @built
|
|
37
|
+
agent = Marisa::Agent.new
|
|
38
|
+
agent.set_query(key)
|
|
39
|
+
[].tap do |results|
|
|
40
|
+
while @trie.predictive_search(agent)
|
|
41
|
+
results << agent_key_value(agent)
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
protected
|
|
47
|
+
def raw_key(key, value)
|
|
48
|
+
key + @sep + value
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def agent_key_value(agent)
|
|
52
|
+
agent.key_str.split(@sep)[1]
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
module Melisa
|
|
2
|
+
class IntTrie < BytesTrie
|
|
3
|
+
protected
|
|
4
|
+
def raw_key(key, value)
|
|
5
|
+
key + @sep + [value.to_i].pack('i*')
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def agent_key_value(agent)
|
|
9
|
+
if value = agent.key_str.split(@sep)[1]
|
|
10
|
+
value.unpack('i*').first
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
module Melisa
|
|
2
|
+
class Search
|
|
3
|
+
include Enumerable
|
|
4
|
+
|
|
5
|
+
def initialize(trie, prefix)
|
|
6
|
+
@trie = trie
|
|
7
|
+
@prefix = prefix
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def search(prefix)
|
|
11
|
+
Search.new(@trie, @prefix + prefix)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def reset_agent
|
|
15
|
+
# Reset the agent state so predictive_search iterates through all keys
|
|
16
|
+
@agent = Marisa::Agent.new
|
|
17
|
+
@agent.set_query(@prefix)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def each(&block)
|
|
21
|
+
reset_agent
|
|
22
|
+
# Yield each key
|
|
23
|
+
yield @agent.key_str while @trie.trie.predictive_search(@agent)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def size
|
|
27
|
+
keys.size
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def keys
|
|
31
|
+
@keys ||= [].tap do |arr|
|
|
32
|
+
reset_agent
|
|
33
|
+
arr << @agent.key_str while @trie.trie.predictive_search(@agent)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def has_keys?
|
|
38
|
+
reset_agent
|
|
39
|
+
return @trie.trie.predictive_search(@agent)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def include?(key)
|
|
43
|
+
a = Marisa::Agent.new
|
|
44
|
+
a.set_query(key)
|
|
45
|
+
@trie.trie.lookup(a)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def with_prefixes(&block)
|
|
49
|
+
reset_agent
|
|
50
|
+
while @trie.trie.common_prefix_search(@agent)
|
|
51
|
+
block.call(@agent.key_str)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
data/lib/melisa/trie.rb
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
require "melisa/base_config_flags"
|
|
2
|
+
require "melisa/search"
|
|
3
|
+
|
|
4
|
+
module Melisa
|
|
5
|
+
ImmutableError = Class.new(StandardError)
|
|
6
|
+
|
|
7
|
+
class Trie
|
|
8
|
+
include Enumerable
|
|
9
|
+
|
|
10
|
+
attr_reader :trie
|
|
11
|
+
|
|
12
|
+
# Initialize a BaseTrie.
|
|
13
|
+
# @keys An array of UTF-8 strings
|
|
14
|
+
# @weights An array of corresponding weights
|
|
15
|
+
# @opts
|
|
16
|
+
# :binary Boolean, true for a binary Trie, false for text
|
|
17
|
+
# :num_tries An integer from 1 to 127 representing the depth of recursive Tries
|
|
18
|
+
# :cache_size One of [:tiny, :small, :normal, :large, :huge]
|
|
19
|
+
# :order One of [:label, :weight]
|
|
20
|
+
def initialize(keys=[], weights=[], opts={})
|
|
21
|
+
@trie = Marisa::Trie.new
|
|
22
|
+
@keyset = Marisa::Keyset.new
|
|
23
|
+
@options = opts
|
|
24
|
+
@built = false
|
|
25
|
+
|
|
26
|
+
add_many(keys, weights)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def build
|
|
30
|
+
@trie.build(@keyset, config_flags(@options)) unless @built
|
|
31
|
+
@built = true
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def add(key, weight=nil)
|
|
35
|
+
raise ImmutableError, "Can't add #{key}, Trie already built" if @built
|
|
36
|
+
self.tap { push(key, weight) }
|
|
37
|
+
end
|
|
38
|
+
alias :<< :add
|
|
39
|
+
|
|
40
|
+
def add_many(keys, weights)
|
|
41
|
+
for key, weight in keys.zip(weights)
|
|
42
|
+
push(key, weight)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def search(prefix)
|
|
47
|
+
build unless @built
|
|
48
|
+
Search.new(self, prefix)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def each(&block)
|
|
52
|
+
build unless @built
|
|
53
|
+
search('').each(&block)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def size
|
|
57
|
+
build unless @built
|
|
58
|
+
@trie.num_keys()
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def keys
|
|
62
|
+
build unless @built
|
|
63
|
+
search('').keys
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def has_keys?
|
|
67
|
+
build unless @built
|
|
68
|
+
search('').has_keys?
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def include?(key)
|
|
72
|
+
build unless @built
|
|
73
|
+
search('').include?(key)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def load(path)
|
|
77
|
+
self.tap { @trie.load(path); @built = true }
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def save(path)
|
|
81
|
+
build unless @built
|
|
82
|
+
self.tap { @trie.save(path) }
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
protected
|
|
86
|
+
include BaseConfigFlags
|
|
87
|
+
|
|
88
|
+
def push(key, weight=nil)
|
|
89
|
+
if weight
|
|
90
|
+
@keyset.push_back(key, weight)
|
|
91
|
+
else
|
|
92
|
+
@keyset.push_back(key)
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
data/melisa.gemspec
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
|
+
require 'melisa/version'
|
|
5
|
+
|
|
6
|
+
Gem::Specification.new do |gem|
|
|
7
|
+
gem.name = "melisa"
|
|
8
|
+
gem.summary = "Melisa is a Rubyesque wrapper for the Marisa Trie C library"
|
|
9
|
+
gem.description = "While marisa-trie provides a ruby binding, it is not particularly rubyesque. Melisa fixes that."
|
|
10
|
+
gem.homepage = "http://github.com/wordtreefoundation/melisa"
|
|
11
|
+
gem.authors = ['Duane Johnson']
|
|
12
|
+
gem.email = ['duane.johnson@gmail.com']
|
|
13
|
+
gem.licenses = ["MIT"]
|
|
14
|
+
|
|
15
|
+
gem.files = %w[melisa.gemspec README.md]
|
|
16
|
+
gem.files += Dir.glob("lib/**/*.rb")
|
|
17
|
+
gem.files += Dir.glob("spec/**/*")
|
|
18
|
+
|
|
19
|
+
# Configure marisa external C library
|
|
20
|
+
gem.files += Dir.glob("ext/**/*.h")
|
|
21
|
+
gem.files += Dir.glob("ext/**/*.cc")
|
|
22
|
+
gem.files += Dir.glob("ext/**/*.cxx")
|
|
23
|
+
gem.files += Dir.glob("ext/**/extconf.rb")
|
|
24
|
+
gem.platform = Gem::Platform::RUBY
|
|
25
|
+
gem.extensions = Dir['ext/**/extconf.rb']
|
|
26
|
+
|
|
27
|
+
gem.test_files = Dir.glob("spec/**/*")
|
|
28
|
+
gem.require_paths = ["lib", "ext"]
|
|
29
|
+
gem.version = Melisa::VERSION
|
|
30
|
+
gem.required_ruby_version = '>= 1.9.0'
|
|
31
|
+
|
|
32
|
+
gem.add_development_dependency "rake"
|
|
33
|
+
gem.add_development_dependency "bundler", ">= 1.0.0"
|
|
34
|
+
gem.add_development_dependency "rspec", "~> 2.6"
|
|
35
|
+
gem.add_development_dependency "debugger"
|
|
36
|
+
end
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
require_relative "spec_helper"
|
|
2
|
+
|
|
3
|
+
class ConfigHarness
|
|
4
|
+
include Melisa::BaseConfigFlags
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
describe Melisa::BaseConfigFlags do
|
|
8
|
+
let(:config) { ConfigHarness.new }
|
|
9
|
+
|
|
10
|
+
context "config_flags" do
|
|
11
|
+
it "builds an integer" do
|
|
12
|
+
config.config_flags.should be_a(Integer)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
it "accepts a hash of options" do
|
|
16
|
+
config.config_flags(:binary => false).should == 135683
|
|
17
|
+
config.config_flags(:binary => true).should == 139779
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
context "binary_flag" do
|
|
22
|
+
it "accepts boolean" do
|
|
23
|
+
config.binary_flag(true).should == Marisa::BINARY_TAIL
|
|
24
|
+
config.binary_flag(false).should == Marisa::TEXT_TAIL
|
|
25
|
+
lambda { config.binary_flag(nil) }.should raise_error(ArgumentError)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
context "valid_num_tries" do
|
|
30
|
+
it "must be >= MIN" do
|
|
31
|
+
min = Marisa::MIN_NUM_TRIES
|
|
32
|
+
lambda { config.valid_num_tries(min-1) }.should raise_error
|
|
33
|
+
lambda { config.valid_num_tries(min) }.should_not raise_error
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
it "must be <= MAX" do
|
|
37
|
+
max = Marisa::MAX_NUM_TRIES
|
|
38
|
+
lambda { config.valid_num_tries(max+1) }.should raise_error
|
|
39
|
+
lambda { config.valid_num_tries(max) }.should_not raise_error
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
it "accepts values between MIN and MAX" do
|
|
43
|
+
value = Marisa::MIN_NUM_TRIES+1
|
|
44
|
+
config.valid_num_tries(value).should == value
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
it "accepts special :default symbol" do
|
|
48
|
+
config.valid_num_tries(:default).should == Marisa::DEFAULT_NUM_TRIES
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
context "lookup_cache_size" do
|
|
53
|
+
it "must be a valid size" do
|
|
54
|
+
lambda { config.lookup_cache_size(:tiny) }.should_not raise_error
|
|
55
|
+
lambda { config.lookup_cache_size(:eentsy_weentsy) }.should raise_error
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
it "returns an integer" do
|
|
59
|
+
config.lookup_cache_size(:tiny).should be_a(Integer)
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
context "valid_node_order" do
|
|
64
|
+
it "must be a valid node order" do
|
|
65
|
+
lambda { config.valid_node_order(:weight) }.should_not raise_error
|
|
66
|
+
lambda { config.valid_node_order(:upside_down) }.should raise_error
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
it "returns an integer" do
|
|
70
|
+
config.valid_node_order(:weight).should be_a(Integer)
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
require_relative 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe Melisa::BytesTrie do
|
|
4
|
+
let(:hash) { {'one' => '1', 'two' => '2', 'onetwo' => '3'} }
|
|
5
|
+
let(:trie) { Melisa::BytesTrie.new(hash) }
|
|
6
|
+
|
|
7
|
+
it "stores values" do
|
|
8
|
+
trie['one'].should == '1'
|
|
9
|
+
trie['two'].should == '2'
|
|
10
|
+
trie['onetwo'].should == '3'
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
it "retreives many values by prefix" do
|
|
14
|
+
trie.get_all('one').should =~ ['1', '3']
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
require_relative 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe Melisa::IntTrie do
|
|
4
|
+
let(:hash) { {'one' => 1, 'two' => 2, 'onetwo' => 3} }
|
|
5
|
+
let(:trie) { Melisa::IntTrie.new(hash) }
|
|
6
|
+
|
|
7
|
+
it "stores values" do
|
|
8
|
+
trie['one'].should == 1
|
|
9
|
+
trie['two'].should == 2
|
|
10
|
+
trie['onetwo'].should == 3
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
it "retreives many values by prefix" do
|
|
14
|
+
trie.get_all('one').should =~ [1, 3]
|
|
15
|
+
end
|
|
16
|
+
end
|
data/spec/search_spec.rb
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
require_relative 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe Melisa::Search do
|
|
4
|
+
let(:keys) { ['one', 'two', 'onetwo'] }
|
|
5
|
+
let(:trie) { Melisa::Trie.new(keys) }
|
|
6
|
+
|
|
7
|
+
it "iterates with each" do
|
|
8
|
+
arr = []
|
|
9
|
+
trie.each do |key|
|
|
10
|
+
arr << key
|
|
11
|
+
end
|
|
12
|
+
arr.should =~ keys
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
it "implements enumerable methods" do
|
|
16
|
+
trie.all? { |k| k.include? 'o' }.should be_true
|
|
17
|
+
trie.any? { |k| k.include? 'z' }.should_not be_true
|
|
18
|
+
trie.map { |k| k.upcase }.should =~ ['ONE', 'TWO', 'ONETWO']
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
it "narrows the search" do
|
|
22
|
+
subset = trie.search('one')
|
|
23
|
+
subset.should be_a(Melisa::Search)
|
|
24
|
+
subset.size.should == 2
|
|
25
|
+
subset.keys.should =~ ['one', 'onetwo']
|
|
26
|
+
# subset.map { |k| k.upcase }.should =~ ['ONE', 'ONETWO']
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
end
|
data/spec/spec_helper.rb
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
require 'melisa'
|
data/spec/trie_spec.rb
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
require_relative "spec_helper"
|
|
2
|
+
require 'tempfile'
|
|
3
|
+
|
|
4
|
+
describe Melisa::Trie do
|
|
5
|
+
let(:terms) { ['one', 'two', 'onetwo'] }
|
|
6
|
+
let(:trie) { Melisa::Trie.new(terms) }
|
|
7
|
+
|
|
8
|
+
it "initializes" do
|
|
9
|
+
trie
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
it "tests for inclusion" do
|
|
13
|
+
trie.include?('one').should be_true
|
|
14
|
+
trie.include?('three').should_not be_true
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
it "lists keys" do
|
|
18
|
+
trie.keys.should =~ ['one', 'two', 'onetwo']
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
it "saves" do
|
|
22
|
+
tmp = Tempfile.new('melisa')
|
|
23
|
+
trie.save(tmp.path)
|
|
24
|
+
|
|
25
|
+
trie2 = Melisa::Trie.new
|
|
26
|
+
trie2.load(tmp.path)
|
|
27
|
+
|
|
28
|
+
trie2.keys.should =~ ['one', 'two', 'onetwo']
|
|
29
|
+
end
|
|
30
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: melisa
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
prerelease:
|
|
6
|
+
platform: ruby
|
|
7
|
+
authors:
|
|
8
|
+
- Duane Johnson
|
|
9
|
+
autorequire:
|
|
10
|
+
bindir: bin
|
|
11
|
+
cert_chain: []
|
|
12
|
+
date: 2014-03-30 00:00:00.000000000 Z
|
|
13
|
+
dependencies:
|
|
14
|
+
- !ruby/object:Gem::Dependency
|
|
15
|
+
name: rake
|
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
|
17
|
+
none: false
|
|
18
|
+
requirements:
|
|
19
|
+
- - ! '>='
|
|
20
|
+
- !ruby/object:Gem::Version
|
|
21
|
+
version: '0'
|
|
22
|
+
type: :development
|
|
23
|
+
prerelease: false
|
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
25
|
+
none: false
|
|
26
|
+
requirements:
|
|
27
|
+
- - ! '>='
|
|
28
|
+
- !ruby/object:Gem::Version
|
|
29
|
+
version: '0'
|
|
30
|
+
- !ruby/object:Gem::Dependency
|
|
31
|
+
name: bundler
|
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
|
33
|
+
none: false
|
|
34
|
+
requirements:
|
|
35
|
+
- - ! '>='
|
|
36
|
+
- !ruby/object:Gem::Version
|
|
37
|
+
version: 1.0.0
|
|
38
|
+
type: :development
|
|
39
|
+
prerelease: false
|
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
41
|
+
none: false
|
|
42
|
+
requirements:
|
|
43
|
+
- - ! '>='
|
|
44
|
+
- !ruby/object:Gem::Version
|
|
45
|
+
version: 1.0.0
|
|
46
|
+
- !ruby/object:Gem::Dependency
|
|
47
|
+
name: rspec
|
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
|
49
|
+
none: false
|
|
50
|
+
requirements:
|
|
51
|
+
- - ~>
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: '2.6'
|
|
54
|
+
type: :development
|
|
55
|
+
prerelease: false
|
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
57
|
+
none: false
|
|
58
|
+
requirements:
|
|
59
|
+
- - ~>
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '2.6'
|
|
62
|
+
- !ruby/object:Gem::Dependency
|
|
63
|
+
name: debugger
|
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
|
65
|
+
none: false
|
|
66
|
+
requirements:
|
|
67
|
+
- - ! '>='
|
|
68
|
+
- !ruby/object:Gem::Version
|
|
69
|
+
version: '0'
|
|
70
|
+
type: :development
|
|
71
|
+
prerelease: false
|
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
73
|
+
none: false
|
|
74
|
+
requirements:
|
|
75
|
+
- - ! '>='
|
|
76
|
+
- !ruby/object:Gem::Version
|
|
77
|
+
version: '0'
|
|
78
|
+
description: While marisa-trie provides a ruby binding, it is not particularly rubyesque.
|
|
79
|
+
Melisa fixes that.
|
|
80
|
+
email:
|
|
81
|
+
- duane.johnson@gmail.com
|
|
82
|
+
executables: []
|
|
83
|
+
extensions:
|
|
84
|
+
- ext/marisa/bindings/ruby/extconf.rb
|
|
85
|
+
extra_rdoc_files: []
|
|
86
|
+
files:
|
|
87
|
+
- melisa.gemspec
|
|
88
|
+
- README.md
|
|
89
|
+
- lib/melisa/base_config_flags.rb
|
|
90
|
+
- lib/melisa/bytes_trie.rb
|
|
91
|
+
- lib/melisa/int_trie.rb
|
|
92
|
+
- lib/melisa/search.rb
|
|
93
|
+
- lib/melisa/trie.rb
|
|
94
|
+
- lib/melisa/version.rb
|
|
95
|
+
- lib/melisa.rb
|
|
96
|
+
- spec/base_config_flags_spec.rb
|
|
97
|
+
- spec/bytes_trie_spec.rb
|
|
98
|
+
- spec/int_trie_spec.rb
|
|
99
|
+
- spec/search_spec.rb
|
|
100
|
+
- spec/spec_helper.rb
|
|
101
|
+
- spec/trie_spec.rb
|
|
102
|
+
- ext/marisa/bindings/marisa-swig.h
|
|
103
|
+
- ext/marisa/bindings/perl/marisa-swig.h
|
|
104
|
+
- ext/marisa/bindings/python/marisa-swig.h
|
|
105
|
+
- ext/marisa/bindings/ruby/marisa-swig.h
|
|
106
|
+
- ext/marisa/lib/marisa/agent.h
|
|
107
|
+
- ext/marisa/lib/marisa/base.h
|
|
108
|
+
- ext/marisa/lib/marisa/exception.h
|
|
109
|
+
- ext/marisa/lib/marisa/grimoire/algorithm/sort.h
|
|
110
|
+
- ext/marisa/lib/marisa/grimoire/algorithm.h
|
|
111
|
+
- ext/marisa/lib/marisa/grimoire/intrin.h
|
|
112
|
+
- ext/marisa/lib/marisa/grimoire/io/mapper.h
|
|
113
|
+
- ext/marisa/lib/marisa/grimoire/io/reader.h
|
|
114
|
+
- ext/marisa/lib/marisa/grimoire/io/writer.h
|
|
115
|
+
- ext/marisa/lib/marisa/grimoire/io.h
|
|
116
|
+
- ext/marisa/lib/marisa/grimoire/trie/cache.h
|
|
117
|
+
- ext/marisa/lib/marisa/grimoire/trie/config.h
|
|
118
|
+
- ext/marisa/lib/marisa/grimoire/trie/entry.h
|
|
119
|
+
- ext/marisa/lib/marisa/grimoire/trie/header.h
|
|
120
|
+
- ext/marisa/lib/marisa/grimoire/trie/history.h
|
|
121
|
+
- ext/marisa/lib/marisa/grimoire/trie/key.h
|
|
122
|
+
- ext/marisa/lib/marisa/grimoire/trie/louds-trie.h
|
|
123
|
+
- ext/marisa/lib/marisa/grimoire/trie/range.h
|
|
124
|
+
- ext/marisa/lib/marisa/grimoire/trie/state.h
|
|
125
|
+
- ext/marisa/lib/marisa/grimoire/trie/tail.h
|
|
126
|
+
- ext/marisa/lib/marisa/grimoire/trie.h
|
|
127
|
+
- ext/marisa/lib/marisa/grimoire/vector/bit-vector.h
|
|
128
|
+
- ext/marisa/lib/marisa/grimoire/vector/flat-vector.h
|
|
129
|
+
- ext/marisa/lib/marisa/grimoire/vector/pop-count.h
|
|
130
|
+
- ext/marisa/lib/marisa/grimoire/vector/rank-index.h
|
|
131
|
+
- ext/marisa/lib/marisa/grimoire/vector/vector.h
|
|
132
|
+
- ext/marisa/lib/marisa/grimoire/vector.h
|
|
133
|
+
- ext/marisa/lib/marisa/iostream.h
|
|
134
|
+
- ext/marisa/lib/marisa/key.h
|
|
135
|
+
- ext/marisa/lib/marisa/keyset.h
|
|
136
|
+
- ext/marisa/lib/marisa/query.h
|
|
137
|
+
- ext/marisa/lib/marisa/scoped-array.h
|
|
138
|
+
- ext/marisa/lib/marisa/scoped-ptr.h
|
|
139
|
+
- ext/marisa/lib/marisa/stdio.h
|
|
140
|
+
- ext/marisa/lib/marisa/trie.h
|
|
141
|
+
- ext/marisa/lib/marisa.h
|
|
142
|
+
- ext/marisa/tests/marisa-assert.h
|
|
143
|
+
- ext/marisa/tools/cmdopt.h
|
|
144
|
+
- ext/marisa/lib/marisa/agent.cc
|
|
145
|
+
- ext/marisa/lib/marisa/grimoire/io/mapper.cc
|
|
146
|
+
- ext/marisa/lib/marisa/grimoire/io/reader.cc
|
|
147
|
+
- ext/marisa/lib/marisa/grimoire/io/writer.cc
|
|
148
|
+
- ext/marisa/lib/marisa/grimoire/trie/louds-trie.cc
|
|
149
|
+
- ext/marisa/lib/marisa/grimoire/trie/tail.cc
|
|
150
|
+
- ext/marisa/lib/marisa/grimoire/vector/bit-vector.cc
|
|
151
|
+
- ext/marisa/lib/marisa/keyset.cc
|
|
152
|
+
- ext/marisa/lib/marisa/trie.cc
|
|
153
|
+
- ext/marisa/tests/base-test.cc
|
|
154
|
+
- ext/marisa/tests/io-test.cc
|
|
155
|
+
- ext/marisa/tests/marisa-test.cc
|
|
156
|
+
- ext/marisa/tests/trie-test.cc
|
|
157
|
+
- ext/marisa/tests/vector-test.cc
|
|
158
|
+
- ext/marisa/tools/cmdopt.cc
|
|
159
|
+
- ext/marisa/tools/marisa-benchmark.cc
|
|
160
|
+
- ext/marisa/tools/marisa-build.cc
|
|
161
|
+
- ext/marisa/tools/marisa-common-prefix-search.cc
|
|
162
|
+
- ext/marisa/tools/marisa-dump.cc
|
|
163
|
+
- ext/marisa/tools/marisa-lookup.cc
|
|
164
|
+
- ext/marisa/tools/marisa-predictive-search.cc
|
|
165
|
+
- ext/marisa/tools/marisa-reverse-lookup.cc
|
|
166
|
+
- ext/marisa/bindings/marisa-swig.cxx
|
|
167
|
+
- ext/marisa/bindings/perl/marisa-swig.cxx
|
|
168
|
+
- ext/marisa/bindings/perl/marisa-swig_wrap.cxx
|
|
169
|
+
- ext/marisa/bindings/python/marisa-swig.cxx
|
|
170
|
+
- ext/marisa/bindings/python/marisa-swig_wrap.cxx
|
|
171
|
+
- ext/marisa/bindings/ruby/marisa-swig.cxx
|
|
172
|
+
- ext/marisa/bindings/ruby/marisa-swig_wrap.cxx
|
|
173
|
+
- ext/marisa/bindings/ruby/extconf.rb
|
|
174
|
+
homepage: http://github.com/wordtreefoundation/melisa
|
|
175
|
+
licenses:
|
|
176
|
+
- MIT
|
|
177
|
+
post_install_message:
|
|
178
|
+
rdoc_options: []
|
|
179
|
+
require_paths:
|
|
180
|
+
- lib
|
|
181
|
+
- ext
|
|
182
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
183
|
+
none: false
|
|
184
|
+
requirements:
|
|
185
|
+
- - ! '>='
|
|
186
|
+
- !ruby/object:Gem::Version
|
|
187
|
+
version: 1.9.0
|
|
188
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
189
|
+
none: false
|
|
190
|
+
requirements:
|
|
191
|
+
- - ! '>='
|
|
192
|
+
- !ruby/object:Gem::Version
|
|
193
|
+
version: '0'
|
|
194
|
+
requirements: []
|
|
195
|
+
rubyforge_project:
|
|
196
|
+
rubygems_version: 1.8.23
|
|
197
|
+
signing_key:
|
|
198
|
+
specification_version: 3
|
|
199
|
+
summary: Melisa is a Rubyesque wrapper for the Marisa Trie C library
|
|
200
|
+
test_files:
|
|
201
|
+
- spec/base_config_flags_spec.rb
|
|
202
|
+
- spec/bytes_trie_spec.rb
|
|
203
|
+
- spec/int_trie_spec.rb
|
|
204
|
+
- spec/search_spec.rb
|
|
205
|
+
- spec/spec_helper.rb
|
|
206
|
+
- spec/trie_spec.rb
|
|
207
|
+
has_rdoc:
|