treat 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/INSTALL +1 -0
- data/README +3 -0
- data/TODO +14 -26
- data/bin/INFO +1 -1
- data/lib/treat/buildable.rb +10 -11
- data/lib/treat/categories.rb +8 -6
- data/lib/treat/category.rb +7 -2
- data/lib/treat/delegatable.rb +64 -56
- data/lib/treat/detectors/encoding/r_chardet19.rb +1 -1
- data/lib/treat/detectors/language/language_detector.rb +2 -1
- data/lib/treat/detectors/language/what_language.rb +2 -2
- data/lib/treat/detectors.rb +3 -0
- data/lib/treat/entities/entity.rb +1 -1
- data/lib/treat/entities.rb +9 -10
- data/lib/treat/exception.rb +3 -1
- data/lib/treat/extractors/named_entity/abner.rb +1 -1
- data/lib/treat/extractors/named_entity/stanford.rb +2 -2
- data/lib/treat/extractors/time/chronic.rb +2 -2
- data/lib/treat/extractors/time/nickel.rb +2 -2
- data/lib/treat/extractors/topic_words/lda.rb +2 -2
- data/lib/treat/extractors.rb +12 -9
- data/lib/treat/feature.rb +6 -1
- data/lib/treat/formatters/cleaners/html.rb +1 -1
- data/lib/treat/formatters.rb +8 -8
- data/lib/treat/group.rb +11 -10
- data/lib/treat/inflectors/cardinal_words/linguistics.rb +3 -3
- data/lib/treat/inflectors/{conjugators → conjugations}/linguistics.rb +6 -6
- data/lib/treat/inflectors/{declensors → declensions}/en.rb +2 -2
- data/lib/treat/inflectors/{declensors → declensions}/linguistics.rb +5 -5
- data/lib/treat/inflectors/ordinal_words/linguistics.rb +4 -4
- data/lib/treat/inflectors/{stemmers → stem}/porter.rb +1 -1
- data/lib/treat/inflectors/{stemmers → stem}/porter_c.rb +3 -3
- data/lib/treat/inflectors/{stemmers → stem}/uea.rb +3 -3
- data/lib/treat/inflectors.rb +8 -21
- data/lib/treat/kernel.rb +120 -0
- data/lib/treat/languages/arabic.rb +14 -0
- data/lib/treat/languages/categories.rb +5 -0
- data/lib/treat/languages/chinese.rb +12 -0
- data/lib/treat/languages/english/categories.rb +23 -0
- data/lib/treat/{resources → languages/english}/tags.rb +127 -184
- data/lib/treat/languages/english.rb +33 -0
- data/lib/treat/languages/french.rb +17 -0
- data/lib/treat/languages/german.rb +17 -0
- data/lib/treat/languages/italian.rb +14 -0
- data/lib/treat/{resources/languages.txt → languages/list.txt} +0 -0
- data/lib/treat/languages/xinhua.rb +12 -0
- data/lib/treat/languages.rb +91 -0
- data/lib/treat/lexicalizers/category/from_tag.rb +20 -8
- data/lib/treat/lexicalizers/synsets/rita_wn.rb +1 -1
- data/lib/treat/lexicalizers/tag/brill.rb +2 -1
- data/lib/treat/lexicalizers/tag/lingua.rb +2 -1
- data/lib/treat/lexicalizers/tag/stanford.rb +16 -15
- data/lib/treat/lexicalizers.rb +1 -1
- data/lib/treat/object.rb +6 -0
- data/lib/treat/processors/parsers/enju.rb +3 -2
- data/lib/treat/processors/parsers/stanford.rb +15 -12
- data/lib/treat/processors/segmenters/punkt.rb +1 -1
- data/lib/treat/processors/segmenters/stanford.rb +7 -5
- data/lib/treat/processors/segmenters/tactful.rb +1 -1
- data/lib/treat/processors/tokenizers/multilingual.rb +2 -2
- data/lib/treat/processors/tokenizers/stanford.rb +7 -5
- data/lib/treat/visitable.rb +2 -1
- data/lib/treat.rb +105 -54
- data/test/tc_entity.rb +5 -0
- data/test/tc_resources.rb +5 -5
- data/test/tc_treat.rb +1 -2
- data/test/tests.rb +2 -1
- metadata +63 -64
- data/lib/treat/formatters/serializers/yaml/helper.rb +0 -96
- data/lib/treat/inflectors/lemmatizers/e_lemma/Makefile +0 -213
- data/lib/treat/inflectors/lemmatizers/e_lemma/elemma.c +0 -68
- data/lib/treat/inflectors/lemmatizers/e_lemma/extconf.rb +0 -6
- data/lib/treat/inflectors/lemmatizers/e_lemma.rb +0 -12
- data/lib/treat/resources/categories.rb +0 -18
- data/lib/treat/resources/delegates.rb +0 -96
- data/lib/treat/resources/dependencies.rb +0 -0
- data/lib/treat/resources/edges.rb +0 -8
- data/lib/treat/resources/formats.rb +0 -23
- data/lib/treat/resources/languages.rb +0 -86
- data/lib/treat/resources.rb +0 -10
- data/lib/treat/utilities.rb +0 -127
@@ -1,68 +0,0 @@
|
|
1
|
-
#include "wn.h"
|
2
|
-
#include "wnconsts.h"
|
3
|
-
#include "ruby.h"
|
4
|
-
|
5
|
-
/*
|
6
|
-
|
7
|
-
Copyright (C) 2004 UTIYAMA Masao <mutiyama@crl.go.jp>
|
8
|
-
|
9
|
-
This program is free software; you can redistribute it and/or modify
|
10
|
-
it under the terms of the GNU General Public License as published by
|
11
|
-
the Free Software Foundation; either version 2 of the License, or
|
12
|
-
(at your option) any later version.
|
13
|
-
|
14
|
-
This program is distributed in the hope that it will be useful,
|
15
|
-
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
MERCHANTABITreatY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
GNU General Public License for more details.
|
18
|
-
|
19
|
-
You should have received a copy of the GNU General Public License
|
20
|
-
along with this program; if not, write to the Free Software
|
21
|
-
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
22
|
-
*/
|
23
|
-
|
24
|
-
static VALUE
|
25
|
-
parse(VALUE klass, VALUE rb_word, VALUE rb_pos)
|
26
|
-
{
|
27
|
-
char *word;
|
28
|
-
char *POS = STR2CSTR(rb_pos);
|
29
|
-
char *lemma;
|
30
|
-
int pos;
|
31
|
-
VALUE retval = rb_ary_new();
|
32
|
-
|
33
|
-
word = malloc(strlen(STR2CSTR(rb_word))+1);
|
34
|
-
if(!word){rb_raise(rb_eStandardError, "malloc failed.\n");}
|
35
|
-
strcpy(word, STR2CSTR(rb_word));
|
36
|
-
|
37
|
-
if(strcmp(POS,"noun")==0){pos = NOUN;}
|
38
|
-
else if(strcmp(POS,"verb")==0){pos = VERB;}
|
39
|
-
else if(strcmp(POS,"adj")==0){pos = ADJ;}
|
40
|
-
else if(strcmp(POS,"adv")==0){pos = ADV;}
|
41
|
-
else{
|
42
|
-
rb_raise(rb_eStandardError, "%s should be (noun|verb|adj|adv)\n", POS);
|
43
|
-
}
|
44
|
-
if(is_defined(word, pos)){
|
45
|
-
/*printf("* %s found as is.\n", word);*/
|
46
|
-
rb_ary_push(retval, rb_str_new2(word));
|
47
|
-
}
|
48
|
-
if((lemma=morphstr(word, pos))!=NULL){
|
49
|
-
do {
|
50
|
-
if(is_defined(lemma, pos)){
|
51
|
-
/*printf("* %s => %s found.\n", word, lemma);*/
|
52
|
-
rb_ary_push(retval, rb_str_new2(lemma));
|
53
|
-
}
|
54
|
-
} while((lemma=morphstr(NULL, pos))!=NULL);
|
55
|
-
}
|
56
|
-
free(word);
|
57
|
-
return retval;
|
58
|
-
}
|
59
|
-
|
60
|
-
void
|
61
|
-
Init_elemma()
|
62
|
-
{
|
63
|
-
VALUE mod = rb_define_module("ELemma");
|
64
|
-
rb_define_module_function(mod, "parse", parse, 2);
|
65
|
-
if(wninit()){
|
66
|
-
rb_raise(rb_eStandardError, "Cannot open WordNet database\n");
|
67
|
-
}
|
68
|
-
}
|
@@ -1,18 +0,0 @@
|
|
1
|
-
module Treat
|
2
|
-
module Resources
|
3
|
-
class Categories
|
4
|
-
List = [
|
5
|
-
:adjective, :adverb, :noun, :verb, :interjection,
|
6
|
-
:clitic, :coverb, :conjunction, :determiner, :particle,
|
7
|
-
:preposition, :pronoun, :number, :symbol, :punctuation,
|
8
|
-
:complementizer
|
9
|
-
]
|
10
|
-
wttc = {}
|
11
|
-
Treat::Resources::Tags::AlignedWordTags.each_slice(2) do |desc, tags|
|
12
|
-
desc = desc.gsub(',', ' ,').split(' ')[0].downcase
|
13
|
-
tags.each { |tag| wttc[tag] = desc.intern }
|
14
|
-
end
|
15
|
-
WordTagToCategory = wttc
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
@@ -1,96 +0,0 @@
|
|
1
|
-
module Treat
|
2
|
-
module Resources
|
3
|
-
module Delegates
|
4
|
-
class English
|
5
|
-
Extractors = {
|
6
|
-
time: [:chronic],
|
7
|
-
topics: [:reuters],
|
8
|
-
topic_words: [:lda],
|
9
|
-
key_sentences: [:topics_frequency]
|
10
|
-
}
|
11
|
-
Processors = {
|
12
|
-
chunkers: [:txt],
|
13
|
-
parsers: [:enju, :stanford],
|
14
|
-
segmenters: [:tactful, :punkt, :stanford],
|
15
|
-
tokenizers: [:multilingual, :macintyre, :perl, :punkt, :tactful, :stanford]
|
16
|
-
}
|
17
|
-
Lexicalizers = {
|
18
|
-
category: [:from_tag],
|
19
|
-
linkages: [:naive],
|
20
|
-
synsets: [:wordnet, :rita_wn],
|
21
|
-
tag: [:brill, :lingua, :stanford]
|
22
|
-
}
|
23
|
-
Inflectors = {
|
24
|
-
conjugators: [:linguistics],
|
25
|
-
declensors: [:linguistics, :english],
|
26
|
-
lemmatizers: [:e_lemma],
|
27
|
-
stemmers: [:porter_c, :porter, :uea],
|
28
|
-
ordinal_words: [:linguistics],
|
29
|
-
cardinal_words: [:linguistics]
|
30
|
-
}
|
31
|
-
end
|
32
|
-
class German
|
33
|
-
Extractors = {}
|
34
|
-
Inflectors = {}
|
35
|
-
Lexicalizers = {
|
36
|
-
tag: [:stanford]
|
37
|
-
}
|
38
|
-
Processors = {
|
39
|
-
chunkers: [:txt],
|
40
|
-
parsers: [:stanford],
|
41
|
-
segmenters: [:tactful, :punkt, :stanford],
|
42
|
-
tokenizers: [:multilingual, :macintyre, :perl, :punkt, :tactful, :stanford]
|
43
|
-
}
|
44
|
-
end
|
45
|
-
class French
|
46
|
-
Extractors = {}
|
47
|
-
Inflectors = {}
|
48
|
-
Lexicalizers = {
|
49
|
-
tag: [:stanford]
|
50
|
-
}
|
51
|
-
Processors = {
|
52
|
-
chunkers: [:txt],
|
53
|
-
parsers: [:stanford],
|
54
|
-
segmenters: [:tactful, :punkt, :stanford],
|
55
|
-
tokenizers: [:multilingual, :macintyre, :perl, :punkt, :tactful, :stanford]
|
56
|
-
}
|
57
|
-
end
|
58
|
-
class Italian
|
59
|
-
Extractors = {}
|
60
|
-
Inflectors = {}
|
61
|
-
Lexicalizers = {}
|
62
|
-
Processors = {
|
63
|
-
chunkers: [:txt],
|
64
|
-
segmenters: [:tactful, :punkt, :stanford],
|
65
|
-
tokenizers: [:multilingual, :macintyre, :perl, :punkt, :tactful, :stanford]
|
66
|
-
}
|
67
|
-
end
|
68
|
-
class Arabic
|
69
|
-
Extractors = {}
|
70
|
-
Inflectors = {}
|
71
|
-
Lexicalizers = {
|
72
|
-
tag: [:stanford]
|
73
|
-
}
|
74
|
-
Processors = {
|
75
|
-
parsers: [:stanford]
|
76
|
-
}
|
77
|
-
end
|
78
|
-
class Chinese
|
79
|
-
Extractors = {}
|
80
|
-
Inflectors = {}
|
81
|
-
Lexicalizers = {
|
82
|
-
tag: [:stanford]
|
83
|
-
}
|
84
|
-
Processors = {}
|
85
|
-
end
|
86
|
-
class Xinhua
|
87
|
-
Extractors = {}
|
88
|
-
Inflectors = {}
|
89
|
-
Lexicalizers = {}
|
90
|
-
Processors = {
|
91
|
-
parsers: [:stanford]
|
92
|
-
}
|
93
|
-
end
|
94
|
-
end
|
95
|
-
end
|
96
|
-
end
|
File without changes
|
@@ -1,8 +0,0 @@
|
|
1
|
-
# http://nlp.stanford.edu/software/dependencies_manual.pdf
|
2
|
-
|
3
|
-
=begin
|
4
|
-
ENJU
|
5
|
-
|
6
|
-
pred: noun_arg0, noun_arg1, noun_arg2, noun_arg12, it_arg1, there_arg0, quote_arg2, quote_arg12, quote_arg23, quote_arg123, poss_arg2, poss_arg12, aux_arg12, aux_mod_arg12, verb_arg1, verb_arg12, verb_arg123, verb_arg1234, verb_mod_arg1, verb_mod_arg12, verb_mod_arg123, verb_mod_arg1234, adj_arg1, adj_arg12, adj_mod_arg1, adj_mod_arg12, conj_arg1, conj_arg12, conj_arg123, coord_arg12, det_arg1, prep_arg12, prep_arg123, prep_mod_arg12, prep_mod_arg123, lgs_arg2, dtv_arg2, punct_arg1, app_arg12, lparen_arg123, rparen_arg0, comp_arg1, comp_arg12, comp_mod_arg1, relative_arg1, relative_arg12
|
7
|
-
|
8
|
-
=end
|
@@ -1,23 +0,0 @@
|
|
1
|
-
module Treat
|
2
|
-
module Resources
|
3
|
-
module Format
|
4
|
-
|
5
|
-
class XML
|
6
|
-
require 'nokogiri'
|
7
|
-
def self.validate(document_path, schema_path, root_element)
|
8
|
-
schema = Nokogiri::XML::Schema(File.read(schema_path))
|
9
|
-
document = Nokogiri::XML(File.read(document_path))
|
10
|
-
schema.validate(document.xpath("//#{root_element}").to_s)
|
11
|
-
end
|
12
|
-
validate('input.xml', 'schema.xdf', 'container').each do |error|
|
13
|
-
puts error.message
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
class HTML < XML
|
18
|
-
|
19
|
-
end
|
20
|
-
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
@@ -1,86 +0,0 @@
|
|
1
|
-
module Treat
|
2
|
-
module Resources
|
3
|
-
# Dictionnary of ISO-639-1, ISO-639-2 language codes,
|
4
|
-
# as well as their full text description in both
|
5
|
-
# English and French.
|
6
|
-
module Languages
|
7
|
-
ISO639_1 = 1
|
8
|
-
ISO639_2 = 2
|
9
|
-
# Describe a language code (ISO-639-1 or ISO-639-2)
|
10
|
-
# or its full text description in full French or English.
|
11
|
-
def self.describe(lang, desc_lang = :en)
|
12
|
-
raise "Must provide a non-nil language identifier to describe." if lang.nil?
|
13
|
-
lang = find(lang).to_s
|
14
|
-
if [:en, :eng, :english, :anglais].include?(desc_lang)
|
15
|
-
l = @@english_full.key(lang)
|
16
|
-
elsif [:fr, :fra, :french, :french].include?(desc_lang)
|
17
|
-
l = @@french_full.key(lang)
|
18
|
-
else
|
19
|
-
raise Treat::Exception,
|
20
|
-
"Unknown language to describe: #{desc_lang}."
|
21
|
-
end
|
22
|
-
not_found(lang) if l.nil?
|
23
|
-
l.intern
|
24
|
-
end
|
25
|
-
# Raise an error message when a language code
|
26
|
-
# or description is not found and suggest
|
27
|
-
# possible misspellings.
|
28
|
-
def self.not_found(lang)
|
29
|
-
msg = "Language '#{lang}' does not exist."
|
30
|
-
all = @@iso639_2.keys + @@iso639_1.keys +
|
31
|
-
@@english_full.keys + @@french_full.keys
|
32
|
-
msg += did_you_mean?(all, lang)
|
33
|
-
raise Treat::Exception, msg
|
34
|
-
end
|
35
|
-
# Find a language by ISO-639-1 or ISO-639-2 code
|
36
|
-
# or full name (in English or French) and return
|
37
|
-
# the ISO-639-1 or ISO-639-2 language code as a
|
38
|
-
# lowercase identifier.
|
39
|
-
def self.find(lang, rc = ISO639_2)
|
40
|
-
raise "Must provide a non-nil language identifier to describe." if lang.nil?
|
41
|
-
get_languages
|
42
|
-
lang = lang.to_s.downcase
|
43
|
-
if @@iso639_1.has_key?(lang)
|
44
|
-
return :"#{lang}" if rc == ISO639_1
|
45
|
-
return :"#{@@iso639_1[lang]}" if rc == ISO639_2
|
46
|
-
elsif @@iso639_2.has_key?(lang)
|
47
|
-
return :"#{lang}" if rc == ISO639_2
|
48
|
-
return :"#{@@iso639_2[lang]}" if rc == ISO639_1
|
49
|
-
elsif @@english_full.has_key?(lang)
|
50
|
-
return :"#{@@english_full[lang]}" if rc == ISO639_2
|
51
|
-
return :"#{@@iso639_2[@@english_full[lang]]}" if rc == ISO639_1
|
52
|
-
elsif @@french_full.has_key?(lang)
|
53
|
-
return :"#{@@french_full[lang]}" if rc == ISO639_2
|
54
|
-
return :"#{@@iso639_1[@@french_full[lang]]}" if rc == ISO639_2
|
55
|
-
else
|
56
|
-
not_found(lang)
|
57
|
-
end
|
58
|
-
end
|
59
|
-
@@loaded = false
|
60
|
-
# Get the languages from the dictionary.
|
61
|
-
def self.get_languages
|
62
|
-
return if @@loaded
|
63
|
-
@@iso639_1 = {}; @@iso639_2 = {};
|
64
|
-
@@english_full = {}; @@french_full = {}
|
65
|
-
languages = IO.readlines(Treat.lib + '/treat/resources/languages.txt')
|
66
|
-
languages.each do |language|
|
67
|
-
iso639_2, iso639_1, english_desc, french_desc =
|
68
|
-
language.split(',')
|
69
|
-
@@iso639_1[iso639_1] = iso639_2
|
70
|
-
@@iso639_2[iso639_2] = iso639_1
|
71
|
-
unless english_desc.nil?
|
72
|
-
english_desc.strip.downcase.split('|').each do |l|
|
73
|
-
@@english_full[l.downcase.strip] = iso639_2
|
74
|
-
end
|
75
|
-
end
|
76
|
-
unless french_desc.nil?
|
77
|
-
french_desc.strip.downcase.split('|').each do |l|
|
78
|
-
@@french_full[l.downcase.strip] = iso639_2
|
79
|
-
end
|
80
|
-
end
|
81
|
-
end
|
82
|
-
@@loaded = true
|
83
|
-
end
|
84
|
-
end
|
85
|
-
end
|
86
|
-
end
|
data/lib/treat/resources.rb
DELETED
@@ -1,10 +0,0 @@
|
|
1
|
-
module Treat
|
2
|
-
module Resources
|
3
|
-
require 'treat/resources/delegates'
|
4
|
-
require 'treat/resources/dependencies'
|
5
|
-
require 'treat/resources/edges'
|
6
|
-
require 'treat/resources/languages'
|
7
|
-
require 'treat/resources/tags'
|
8
|
-
require 'treat/resources/categories'
|
9
|
-
end
|
10
|
-
end
|
data/lib/treat/utilities.rb
DELETED
@@ -1,127 +0,0 @@
|
|
1
|
-
module Treat
|
2
|
-
# Provides utility functions used across the library.
|
3
|
-
module Utilities
|
4
|
-
# Require file utilities.
|
5
|
-
require 'fileutils'
|
6
|
-
# Returns the platform we are running on.
|
7
|
-
def self.platform
|
8
|
-
RUBY_PLATFORM.split("-")[1]
|
9
|
-
end
|
10
|
-
# Runs a block of code silently, i.e. without
|
11
|
-
# expressing warnings even in verbose mode.
|
12
|
-
# Rename to silence_streamsings.
|
13
|
-
def self.silently(&block)
|
14
|
-
warn_level = $VERBOSE
|
15
|
-
$VERBOSE = nil
|
16
|
-
result = block.call
|
17
|
-
$VERBOSE = warn_level
|
18
|
-
result
|
19
|
-
end
|
20
|
-
def self.silence_streams(*streams)
|
21
|
-
yield
|
22
|
-
end
|
23
|
-
# Create a temporary file which is deleted
|
24
|
-
# after execution of the block.
|
25
|
-
require 'tempfile'
|
26
|
-
def self.create_temp_file(ext, value = nil, &block)
|
27
|
-
tmp = Tempfile.new(['', ".#{ext.to_s}"], Treat.tmp)
|
28
|
-
tmp.puts(value) if value
|
29
|
-
block.call(tmp.path)
|
30
|
-
end
|
31
|
-
# A list of acronyms used in class names within
|
32
|
-
# the program. These do not CamelCase; they
|
33
|
-
# CAMELCASE.
|
34
|
-
@@acronyms = ['XML', 'HTML', 'YAML', 'UEA', 'LDA', 'PDF', 'GOCR', 'Treat'].join('|')
|
35
|
-
@@cc_cache = {}
|
36
|
-
# Convert un_camel_case to CamelCase.
|
37
|
-
def self.camel_case(o_phrase)
|
38
|
-
phrase = o_phrase.to_s.dup
|
39
|
-
return @@cc_cache[o_phrase] if @@cc_cache[o_phrase]
|
40
|
-
phrase.gsub!(/#{@@acronyms.downcase}[^a-z]+/) { |a| a.upcase }
|
41
|
-
phrase.gsub!(/^[a-z]|_[a-z]/) { |a| a.upcase }
|
42
|
-
phrase.gsub!('_', '')
|
43
|
-
@@cc_cache[o_phrase] = phrase
|
44
|
-
phrase
|
45
|
-
end
|
46
|
-
@@ucc_cache = {}
|
47
|
-
# Convert CamelCase to un_camel_case.
|
48
|
-
def self.un_camel_case(o_phrase)
|
49
|
-
phrase = o_phrase.to_s.dup
|
50
|
-
return @@ucc_cache[o_phrase] if @@ucc_cache[o_phrase]
|
51
|
-
phrase.gsub!(/#{@@acronyms}/) { |a| a.downcase.capitalize }
|
52
|
-
phrase.gsub!(/[A-Z]/) { |p| '_' + p.downcase }
|
53
|
-
phrase = phrase[1..-1] if phrase[0] == '_'
|
54
|
-
@@ucc_cache[o_phrase] = phrase
|
55
|
-
phrase
|
56
|
-
end
|
57
|
-
# Return the levensthein distance between two stringsm
|
58
|
-
# taking into account the costs of insertion, deletion,
|
59
|
-
# and substitution. Stolen from:
|
60
|
-
# http://ruby-snippets.heroku.com/string/levenshtein-distance
|
61
|
-
def self.levenshtein(first, other, ins=1, del=1, sub=1)
|
62
|
-
return nil if first.nil? || other.nil?
|
63
|
-
dm = []
|
64
|
-
dm[0] = (0..first.length).collect { |i| i * ins}
|
65
|
-
fill = [0] * (first.length - 1)
|
66
|
-
for i in 1..other.length
|
67
|
-
dm[i] = [i * del, fill.flatten]
|
68
|
-
end
|
69
|
-
for i in 1..other.length
|
70
|
-
for j in 1..first.length
|
71
|
-
dm[i][j] = [
|
72
|
-
dm[i-1][j-1] + (first[i-1] == other[i-1] ? 0 : sub),
|
73
|
-
dm[i][j-1] + ins,
|
74
|
-
dm[i-1][j] + del
|
75
|
-
].min
|
76
|
-
end
|
77
|
-
end
|
78
|
-
dm[other.length][first.length]
|
79
|
-
end
|
80
|
-
# Search the list to see if there are words
|
81
|
-
# similar to name. If yes, return a string
|
82
|
-
# saying "Did you mean ... ?"
|
83
|
-
def self.did_you_mean?(list, name)
|
84
|
-
msg = ''
|
85
|
-
sugg = []
|
86
|
-
list.each do |element|
|
87
|
-
l = levenshtein(element,name)
|
88
|
-
if l > 0 && l < 2
|
89
|
-
sugg << element
|
90
|
-
end
|
91
|
-
end
|
92
|
-
unless sugg.empty?
|
93
|
-
if sugg.size == 1
|
94
|
-
msg += " Perhaps you meant '#{sugg[0]}' ?"
|
95
|
-
else
|
96
|
-
sugg_quote = sugg[0..-2].map {|x| '\'' + x + '\''}
|
97
|
-
msg += " Perhaps you meant #{sugg_quote.join(', ')}," +
|
98
|
-
" or '#{sugg[-1]}' ?"
|
99
|
-
end
|
100
|
-
end
|
101
|
-
msg
|
102
|
-
end
|
103
|
-
def self.caller_method(n = 3)
|
104
|
-
at = caller(n).first
|
105
|
-
/^(.+?):(\d+)(?::in `(.*)')?/ =~ at
|
106
|
-
:"#{Regexp.last_match[3]}"
|
107
|
-
end
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
# Make undefining constants publicly
|
112
|
-
# available on any object.
|
113
|
-
Object.module_eval do
|
114
|
-
def self.const_unset(const); Object.instance_eval { remove_const(const) }; puts const; end
|
115
|
-
end
|
116
|
-
|
117
|
-
# Make the most common utility functions available in the global scope.
|
118
|
-
def create_temp_file(ext, value = nil, &block)
|
119
|
-
Treat::Utilities.create_temp_file(ext, value) { |f| block.call(f) }
|
120
|
-
end
|
121
|
-
def silence_streams(*streams); Treat::Utilities.silence_streams(*streams) { yield }; end
|
122
|
-
def silently(&block); Treat::Utilities.silently { block.call }; end
|
123
|
-
def cc(w); Treat::Utilities.camel_case(w); end
|
124
|
-
def ucc(w); Treat::Utilities.un_camel_case(w); end
|
125
|
-
def cl(n); n.to_s.split('::')[-1]; end
|
126
|
-
def did_you_mean?(l, e); Treat::Utilities.did_you_mean?(l, e); end
|
127
|
-
def caller_method(n = 3); Treat::Utilities.caller_method(n); end
|