linguistics 1.0.9 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/.gemtest +0 -0
- data/ChangeLog +849 -342
- data/History.rdoc +11 -0
- data/LICENSE +9 -9
- data/Manifest.txt +44 -0
- data/README.rdoc +226 -0
- data/Rakefile +32 -349
- data/examples/endocs.rb +272 -0
- data/examples/generalize_sentence.rb +2 -1
- data/examples/klingon.rb +22 -0
- data/lib/linguistics.rb +130 -292
- data/lib/linguistics/en.rb +337 -1628
- data/lib/linguistics/en/articles.rb +138 -0
- data/lib/linguistics/en/conjugation.rb +2245 -0
- data/lib/linguistics/en/conjunctions.rb +202 -0
- data/lib/linguistics/en/{infinitive.rb → infinitives.rb} +41 -55
- data/lib/linguistics/en/linkparser.rb +41 -49
- data/lib/linguistics/en/numbers.rb +483 -0
- data/lib/linguistics/en/participles.rb +33 -0
- data/lib/linguistics/en/pluralization.rb +810 -0
- data/lib/linguistics/en/stemmer.rb +75 -0
- data/lib/linguistics/en/titlecase.rb +121 -0
- data/lib/linguistics/en/wordnet.rb +63 -97
- data/lib/linguistics/inflector.rb +89 -0
- data/lib/linguistics/iso639.rb +534 -448
- data/lib/linguistics/languagebehavior.rb +36 -0
- data/lib/linguistics/monkeypatches.rb +42 -0
- data/spec/lib/constants.rb +15 -0
- data/spec/lib/helpers.rb +38 -0
- data/spec/linguistics/en/articles_spec.rb +797 -0
- data/spec/linguistics/en/conjugation_spec.rb +2083 -0
- data/spec/linguistics/en/conjunctions_spec.rb +154 -0
- data/spec/linguistics/en/infinitives_spec.rb +518 -0
- data/spec/linguistics/en/linkparser_spec.rb +66 -0
- data/spec/linguistics/en/numbers_spec.rb +1295 -0
- data/spec/linguistics/en/participles_spec.rb +55 -0
- data/spec/linguistics/en/pluralization_spec.rb +4636 -0
- data/spec/linguistics/en/stemmer_spec.rb +72 -0
- data/spec/linguistics/en/titlecase_spec.rb +841 -0
- data/spec/linguistics/en/wordnet_spec.rb +85 -0
- data/spec/linguistics/en_spec.rb +45 -167
- data/spec/linguistics/inflector_spec.rb +40 -0
- data/spec/linguistics/iso639_spec.rb +49 -53
- data/spec/linguistics/monkeypatches_spec.rb +40 -0
- data/spec/linguistics_spec.rb +46 -76
- metadata +241 -113
- metadata.gz.sig +0 -0
- data/README +0 -166
- data/README.english +0 -245
- data/rake/191_compat.rb +0 -26
- data/rake/dependencies.rb +0 -76
- data/rake/documentation.rb +0 -123
- data/rake/helpers.rb +0 -502
- data/rake/hg.rb +0 -318
- data/rake/manual.rb +0 -787
- data/rake/packaging.rb +0 -129
- data/rake/publishing.rb +0 -341
- data/rake/style.rb +0 -62
- data/rake/svn.rb +0 -668
- data/rake/testing.rb +0 -152
- data/rake/verifytask.rb +0 -64
- data/tests/en/infinitive.tests.rb +0 -207
- data/tests/en/inflect.tests.rb +0 -1389
- data/tests/en/lafcadio.tests.rb +0 -77
- data/tests/en/linkparser.tests.rb +0 -42
- data/tests/en/lprintf.tests.rb +0 -77
- data/tests/en/titlecase.tests.rb +0 -73
- data/tests/en/wordnet.tests.rb +0 -95
data/tests/en/lafcadio.tests.rb
DELETED
@@ -1,77 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby -w
|
2
|
-
#
|
3
|
-
# Unit test for additions donated by Francis Hwang, author of Lafcadio
|
4
|
-
# $Id: lafcadio.tests.rb,v 221d313ccdd5 2007/06/13 05:25:38 ged $
|
5
|
-
#
|
6
|
-
# Converted from ts_english.rb.
|
7
|
-
#
|
8
|
-
|
9
|
-
unless defined? Linguistics::TestCase
|
10
|
-
testsdir = File::dirname( File::dirname(File::expand_path( __FILE__ )) )
|
11
|
-
$LOAD_PATH.unshift testsdir unless $LOAD_PATH.include?( testsdir )
|
12
|
-
|
13
|
-
require 'lingtestcase'
|
14
|
-
end
|
15
|
-
|
16
|
-
### This test case tests ...
|
17
|
-
class LafcadioAdditionsTestCase < Linguistics::TestCase
|
18
|
-
|
19
|
-
Linguistics::use( :en )
|
20
|
-
include Linguistics::EN
|
21
|
-
|
22
|
-
CamelCaseStrings = [
|
23
|
-
["productCategory", "product category"],
|
24
|
-
["ProductCategory", "product category"],
|
25
|
-
["catalogOrder", "catalog order"],
|
26
|
-
["product", "product"],
|
27
|
-
["theNameOfAMethod", "the name of a method"],
|
28
|
-
]
|
29
|
-
|
30
|
-
ProperNouns = {
|
31
|
-
"albania" => "Albania",
|
32
|
-
"bosnia and herzegovina" => "Bosnia and Herzegovina",
|
33
|
-
"faroe islands" => "Faroe Islands",
|
34
|
-
"macedonia, the former yugoslav republic of" =>
|
35
|
-
"Macedonia, the Former Yugoslav Republic of",
|
36
|
-
"virgin islands, u.s." => "Virgin Islands, U.S.",
|
37
|
-
}
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
#################################################################
|
42
|
-
### T E S T S
|
43
|
-
#################################################################
|
44
|
-
|
45
|
-
def test_camel_case_to_english_should_transform_to_english
|
46
|
-
printTestHeader "Lafcadio Additions: CamelCase to English"
|
47
|
-
res = nil
|
48
|
-
|
49
|
-
CamelCaseStrings.each do |src, dst|
|
50
|
-
assert_nothing_raised { res = src.en.camel_case_to_english }
|
51
|
-
assert_equal dst, res
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
### This already worked before the additions, but might as well test 'em
|
56
|
-
### some more.
|
57
|
-
def test_plural
|
58
|
-
assert_equal "product categories", "product category".en.plural
|
59
|
-
assert_equal "products", "product".en.plural
|
60
|
-
assert_equal 'addresses', 'address'.en.plural
|
61
|
-
assert_equal 'taxes', 'tax'.en.plural
|
62
|
-
end
|
63
|
-
|
64
|
-
|
65
|
-
### String#proper_noun
|
66
|
-
def test_proper_noun_should_return_caseified_string
|
67
|
-
printTestHeader "Lafcadio Additions: Proper Nouns"
|
68
|
-
|
69
|
-
ProperNouns.each do |key,expected|
|
70
|
-
input = key.dup # Get around hash keys being frozen
|
71
|
-
debug_msg "Trying %p, expect: %p" % [input, expected]
|
72
|
-
assert_equal expected, input.en.proper_noun
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
end
|
77
|
-
|
@@ -1,42 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby -w
|
2
|
-
#
|
3
|
-
# Unit test for English link grammar
|
4
|
-
# $Id: linkparser.tests.rb,v 21e0fa69b1a3 2008/09/06 05:20:07 ged $
|
5
|
-
#
|
6
|
-
# Copyright (c) 2003-2005 The FaerieMUD Consortium.
|
7
|
-
#
|
8
|
-
|
9
|
-
unless defined? Linguistics::TestCase
|
10
|
-
testsdir = File::dirname( File::dirname(File::expand_path( __FILE__ )) )
|
11
|
-
$LOAD_PATH.unshift testsdir unless $LOAD_PATH.include?( testsdir )
|
12
|
-
|
13
|
-
require 'lingtestcase'
|
14
|
-
end
|
15
|
-
|
16
|
-
### This test case tests the English language link grammar extension of
|
17
|
-
### Linguistics::EN.
|
18
|
-
class LinkParserTestCase < Linguistics::TestCase
|
19
|
-
|
20
|
-
Linguistics::use(:en)
|
21
|
-
include Linguistics::EN
|
22
|
-
|
23
|
-
### Overridden to skip tests if WordNet isn't installed.
|
24
|
-
def run( result )
|
25
|
-
return super if Linguistics::EN::has_link_parser?
|
26
|
-
yield( STARTED, name )
|
27
|
-
result.add_run
|
28
|
-
yield( FINISHED, name )
|
29
|
-
end
|
30
|
-
|
31
|
-
|
32
|
-
def test_sentence_should_return_a_parsed_linkparser_sentence
|
33
|
-
rval = nil
|
34
|
-
|
35
|
-
assert_nothing_raised do
|
36
|
-
rval = "He is a dog.".en.sentence
|
37
|
-
end
|
38
|
-
|
39
|
-
assert_instance_of LinkParser::Sentence, rval
|
40
|
-
end
|
41
|
-
|
42
|
-
end
|
data/tests/en/lprintf.tests.rb
DELETED
@@ -1,77 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby -w
|
2
|
-
#
|
3
|
-
# Unit test for Linguistics::EN#lprintf
|
4
|
-
# $Id: lprintf.tests.rb,v 221d313ccdd5 2007/06/13 05:25:38 ged $
|
5
|
-
#
|
6
|
-
# Copyright (c) 2006 The FaerieMUD Consortium.
|
7
|
-
#
|
8
|
-
|
9
|
-
unless defined?( Linguistics::TestCase )
|
10
|
-
require 'pathname'
|
11
|
-
basedir = Pathname.new( __FILE__ ).dirname.parent.parent.expand_path
|
12
|
-
|
13
|
-
libdir = basedir + "lib"
|
14
|
-
testsdir = basedir + "tests"
|
15
|
-
$LOAD_PATH.unshift( libdir ) unless $LOAD_PATH.include?( libdir )
|
16
|
-
$LOAD_PATH.unshift( testsdir ) unless $LOAD_PATH.include?( testsdir )
|
17
|
-
|
18
|
-
require 'lingtestcase'
|
19
|
-
end
|
20
|
-
|
21
|
-
require 'linguistics/en'
|
22
|
-
|
23
|
-
### This test case tests the lprintf method of the Linguistics English module
|
24
|
-
class Linguistics::LPrintfTestCase < Linguistics::TestCase
|
25
|
-
|
26
|
-
Items = %w{ruby moose mouse nexus}
|
27
|
-
|
28
|
-
def initialize( *args )
|
29
|
-
Linguistics::use( :en )
|
30
|
-
super
|
31
|
-
end
|
32
|
-
|
33
|
-
def test_lprintf_with_conjunct_tag_should_conjunctionize_the_corresponding_argument
|
34
|
-
rval = nil
|
35
|
-
|
36
|
-
assert_nothing_raised do
|
37
|
-
rval = "I have %CONJUNCT in my pocket".en.lprintf( Items )
|
38
|
-
end
|
39
|
-
|
40
|
-
assert_equal "I have a ruby, a moose, a mouse, and a nexus in my pocket",
|
41
|
-
rval
|
42
|
-
end
|
43
|
-
|
44
|
-
|
45
|
-
def test_lprintf_with_plural_tag_should_pluralize_the_corresponding_argument
|
46
|
-
rval = nil
|
47
|
-
|
48
|
-
assert_nothing_raised do
|
49
|
-
rval = "What's with all the %PL?".en.lprintf( "llama" )
|
50
|
-
end
|
51
|
-
|
52
|
-
assert_equal "What's with all the llamas?", rval
|
53
|
-
end
|
54
|
-
|
55
|
-
|
56
|
-
def test_lprintf_with_indef_article_tag_should_use_an_for_umbrella
|
57
|
-
rval = nil
|
58
|
-
|
59
|
-
assert_nothing_raised do
|
60
|
-
rval = "You pick up %A.".en.lprintf( "umbrella" )
|
61
|
-
end
|
62
|
-
|
63
|
-
assert_equal "You pick up an umbrella.", rval
|
64
|
-
end
|
65
|
-
|
66
|
-
|
67
|
-
def test_lprintf_with_indef_article_tag_should_use_a_for_flagon_of_mead
|
68
|
-
rval = nil
|
69
|
-
|
70
|
-
assert_nothing_raised do
|
71
|
-
rval = "You pick up %A.".en.lprintf( "flagon of mead" )
|
72
|
-
end
|
73
|
-
|
74
|
-
assert_equal "You pick up a flagon of mead.", rval
|
75
|
-
end
|
76
|
-
|
77
|
-
end
|
data/tests/en/titlecase.tests.rb
DELETED
@@ -1,73 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby -w
|
2
|
-
#
|
3
|
-
# Unit test for English language module's title case method
|
4
|
-
# $Id: titlecase.tests.rb,v 221d313ccdd5 2007/06/13 05:25:38 ged $
|
5
|
-
#
|
6
|
-
# Copyright (c) 2005 The FaerieMUD Consortium.
|
7
|
-
#
|
8
|
-
|
9
|
-
unless defined? Linguistics::TestCase
|
10
|
-
testsdir = File::dirname( File::dirname(File::expand_path( __FILE__ )) )
|
11
|
-
$LOAD_PATH.unshift testsdir unless $LOAD_PATH.include?( testsdir )
|
12
|
-
|
13
|
-
require 'lingtestcase'
|
14
|
-
end
|
15
|
-
|
16
|
-
### This test case tests the #titlecase method of the english-language extension
|
17
|
-
### for the Linguistics library.
|
18
|
-
class TitleCaseTestCase < Linguistics::TestCase
|
19
|
-
|
20
|
-
Linguistics::use( :en )
|
21
|
-
|
22
|
-
Titles = File::open(__FILE__).read.split("__END__").last.split("\n")
|
23
|
-
|
24
|
-
def test_nothing
|
25
|
-
debug_msg "Not tested, as the functionality it tests hasn't been released yet."
|
26
|
-
end
|
27
|
-
|
28
|
-
def dont_test_titles
|
29
|
-
printTestHeader "TitleCase: Titles"
|
30
|
-
rval = nil
|
31
|
-
|
32
|
-
debug_msg "Titles = %p" % [Titles]
|
33
|
-
|
34
|
-
Titles.each do |title|
|
35
|
-
next if !/\w/.match( title )
|
36
|
-
|
37
|
-
assert_nothing_raised do
|
38
|
-
rval = title.downcase.en.titlecase
|
39
|
-
end
|
40
|
-
|
41
|
-
assert_instance_of String, rval
|
42
|
-
assert_equal title, rval
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
end
|
47
|
-
|
48
|
-
|
49
|
-
__END__
|
50
|
-
A Certain Kind of Longing
|
51
|
-
A Connecticut Yankee in King Arthur's Court
|
52
|
-
A Portrait of the Artist as a Young Man
|
53
|
-
Alice in Wonderland
|
54
|
-
Catcher in the Rye
|
55
|
-
Crime and Punishment
|
56
|
-
Death of a Salesman
|
57
|
-
Dr. Jekyll and Mr. Hyde
|
58
|
-
Gone With the Wind
|
59
|
-
Gone but Not Forgotten
|
60
|
-
Gulliver's Travels
|
61
|
-
Last of the Mohicans
|
62
|
-
Lord of the Flies
|
63
|
-
Love in the Time of Cholera
|
64
|
-
Maggie, A Girl of the Streets
|
65
|
-
Notes from the Underground
|
66
|
-
Of Mice and Men
|
67
|
-
Pride and Prejudice
|
68
|
-
The Adventures of Don Quixote
|
69
|
-
The Good Earth
|
70
|
-
The Heart of Darkness
|
71
|
-
The Lord of the Rings
|
72
|
-
The Old Man and the Sea
|
73
|
-
Mrs. Frisby and the Rats of N.I.M.H.
|
data/tests/en/wordnet.tests.rb
DELETED
@@ -1,95 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby -w
|
2
|
-
#
|
3
|
-
# Unit test for Linguistics::EN WordNet functions
|
4
|
-
# $Id: wordnet.tests.rb,v 21e0fa69b1a3 2008/09/06 05:20:07 ged $
|
5
|
-
#
|
6
|
-
# Copyright (c) 2003 The FaerieMUD Consortium.
|
7
|
-
#
|
8
|
-
|
9
|
-
unless defined? Linguistics::TestCase
|
10
|
-
testsdir = File::dirname( File::dirname(File::expand_path( __FILE__ )) )
|
11
|
-
$LOAD_PATH.unshift testsdir unless $LOAD_PATH.include?( testsdir )
|
12
|
-
|
13
|
-
require 'lingtestcase'
|
14
|
-
end
|
15
|
-
|
16
|
-
|
17
|
-
### This test case tests ...
|
18
|
-
class EnglishWordnetTestCase < Linguistics::TestCase
|
19
|
-
|
20
|
-
Linguistics::use( :en, :installProxy => true )
|
21
|
-
|
22
|
-
TestObjects = [ "auto", 5, [%w{ash bin}] ]
|
23
|
-
|
24
|
-
### Overridden to skip tests if WordNet isn't installed.
|
25
|
-
def run( result )
|
26
|
-
return super if Linguistics::EN::has_wordnet?
|
27
|
-
yield( STARTED, name )
|
28
|
-
result.add_run
|
29
|
-
yield( FINISHED, name )
|
30
|
-
end
|
31
|
-
|
32
|
-
|
33
|
-
#################################################################
|
34
|
-
### T E S T S
|
35
|
-
#################################################################
|
36
|
-
|
37
|
-
### Test the wn_lexicon method of the EN module
|
38
|
-
def test_00_Lexicon
|
39
|
-
printTestHeader "English: WordNet: Lexicon"
|
40
|
-
|
41
|
-
assert_respond_to Linguistics::EN, :wn_lexicon
|
42
|
-
assert_nothing_raised {
|
43
|
-
lex = Linguistics::EN::wn_lexicon
|
44
|
-
assert_instance_of WordNet::Lexicon, lex
|
45
|
-
}
|
46
|
-
end
|
47
|
-
|
48
|
-
### Test synset method
|
49
|
-
def test_05_Synset
|
50
|
-
printTestHeader "English: WordNet: Synsets"
|
51
|
-
rval = nil
|
52
|
-
|
53
|
-
TestObjects.each do |obj|
|
54
|
-
assert_respond_to obj, :en
|
55
|
-
assert_respond_to obj.en, :synset
|
56
|
-
|
57
|
-
assert_nothing_raised { rval = obj.en.synset }
|
58
|
-
assert_instance_of WordNet::Synset, rval,
|
59
|
-
".en.synset for %p" % obj
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
### Test proxy method
|
64
|
-
def test_06_ProxyMethod
|
65
|
-
printTestHeader "English: WordNet: Proxy method"
|
66
|
-
rval = nil
|
67
|
-
|
68
|
-
TestObjects.each do |obj|
|
69
|
-
assert_nothing_raised { rval = obj.synset }
|
70
|
-
assert_instance_of WordNet::Synset, rval,
|
71
|
-
".synset for %p" % obj
|
72
|
-
end
|
73
|
-
|
74
|
-
self.class.addSetupBlock {
|
75
|
-
@obj = TestObjects[0]
|
76
|
-
}
|
77
|
-
self.class.addTeardownBlock {
|
78
|
-
@obj = nil
|
79
|
-
}
|
80
|
-
end
|
81
|
-
|
82
|
-
### Test #coordinates
|
83
|
-
def test_10_Coordinates
|
84
|
-
printTestHeader "English: WordNet: Coordinate terms"
|
85
|
-
rval = nil
|
86
|
-
|
87
|
-
assert_nothing_raised {
|
88
|
-
rval = @obj.coordinates
|
89
|
-
}
|
90
|
-
assert_instance_of Array, rval
|
91
|
-
assert_instance_of WordNet::Synset, rval.first
|
92
|
-
end
|
93
|
-
|
94
|
-
end
|
95
|
-
|