linguistics 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +640 -0
- data/LICENSE +27 -0
- data/README +166 -0
- data/README.english +245 -0
- data/Rakefile +338 -0
- data/examples/generalize_sentence.rb +46 -0
- data/lib/linguistics.rb +366 -0
- data/lib/linguistics/en.rb +1728 -0
- data/lib/linguistics/en/infinitive.rb +1145 -0
- data/lib/linguistics/en/linkparser.rb +109 -0
- data/lib/linguistics/en/wordnet.rb +257 -0
- data/lib/linguistics/iso639.rb +461 -0
- data/rake/191_compat.rb +26 -0
- data/rake/dependencies.rb +76 -0
- data/rake/helpers.rb +434 -0
- data/rake/hg.rb +261 -0
- data/rake/manual.rb +782 -0
- data/rake/packaging.rb +144 -0
- data/rake/publishing.rb +318 -0
- data/rake/rdoc.rb +30 -0
- data/rake/style.rb +62 -0
- data/rake/svn.rb +668 -0
- data/rake/testing.rb +187 -0
- data/rake/verifytask.rb +64 -0
- data/rake/win32.rb +190 -0
- data/spec/linguistics/en_spec.rb +215 -0
- data/spec/linguistics/iso639_spec.rb +72 -0
- data/spec/linguistics_spec.rb +107 -0
- data/tests/en/infinitive.tests.rb +207 -0
- data/tests/en/inflect.tests.rb +1389 -0
- data/tests/en/lafcadio.tests.rb +77 -0
- data/tests/en/linkparser.tests.rb +42 -0
- data/tests/en/lprintf.tests.rb +77 -0
- data/tests/en/titlecase.tests.rb +73 -0
- data/tests/en/wordnet.tests.rb +95 -0
- metadata +107 -0
data/LICENSE
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Copyright (c) 2003-2008, Michael Granger
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
6
|
+
|
7
|
+
* Redistributions of source code must retain the above copyright notice,
|
8
|
+
this list of conditions and the following disclaimer.
|
9
|
+
|
10
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
11
|
+
this list of conditions and the following disclaimer in the documentation
|
12
|
+
and/or other materials provided with the distribution.
|
13
|
+
|
14
|
+
* Neither the name of the author/s, nor the names of the project's
|
15
|
+
contributors may be used to endorse or promote products derived from this
|
16
|
+
software without specific prior written permission.
|
17
|
+
|
18
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
19
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
20
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
21
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
22
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
23
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
24
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
25
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
26
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README
ADDED
@@ -0,0 +1,166 @@
|
|
1
|
+
|
2
|
+
= Linguistics
|
3
|
+
|
4
|
+
== Authors
|
5
|
+
|
6
|
+
* Michael Granger <ged@FaerieMUD.org>
|
7
|
+
* Martin Chase <stillflame@FaerieMUD.org>
|
8
|
+
|
9
|
+
|
10
|
+
== Requirements
|
11
|
+
|
12
|
+
* Ruby >= 1.8.6
|
13
|
+
|
14
|
+
|
15
|
+
== Optional
|
16
|
+
|
17
|
+
* Ruby-WordNet (>= 0.0.5) - adds integration for the Ruby binding for the
|
18
|
+
WordNet� lexical refrence system.
|
19
|
+
|
20
|
+
URL: http://deveiate.org/projects/Ruby-WordNet
|
21
|
+
|
22
|
+
* LinkParser (>= 1.0.5)
|
23
|
+
|
24
|
+
URL: http://deveiate.org/projects/Ruby-LinkParser
|
25
|
+
|
26
|
+
|
27
|
+
== General Information
|
28
|
+
|
29
|
+
Linguistics is a framework for building linguistic utilities for Ruby objects
|
30
|
+
in any language. It includes a generic language-independant front end, a
|
31
|
+
module for mapping language codes into language names, and a module which
|
32
|
+
contains various English-language utilities.
|
33
|
+
|
34
|
+
|
35
|
+
=== Method Interface
|
36
|
+
|
37
|
+
The Linguistics module comes with a language-independant mechanism for
|
38
|
+
extending core Ruby classes with linguistic methods.
|
39
|
+
|
40
|
+
It consists of three parts: a core linguistics module which contains the
|
41
|
+
class-extension framework for languages, a generic inflector class that serves
|
42
|
+
as a delegator for linguistic methods on Ruby objects, and one or more
|
43
|
+
language-specific modules which contain the actual linguistic functions.
|
44
|
+
|
45
|
+
The module works by adding a single instance method for each language named
|
46
|
+
after the language's two-letter code (or three-letter code, if no two-letter
|
47
|
+
code is defined by ISO639) to various Ruby classes. This allows many
|
48
|
+
language-specific methods to be added to objects without cluttering up the
|
49
|
+
interface or risking collision between them, albeit at the cost of three or four
|
50
|
+
more characters per method invocation.
|
51
|
+
|
52
|
+
If you don't like extending core Ruby classes, the language modules should
|
53
|
+
also allow you to use them as a function library as well.
|
54
|
+
|
55
|
+
For example, the English-language module contains a #plural function which can
|
56
|
+
be accessed via a method on a core class:
|
57
|
+
|
58
|
+
Linguistics::use( :en )
|
59
|
+
"goose".en.plural
|
60
|
+
# => "geese"
|
61
|
+
|
62
|
+
or via the Linguistics::EN::plural function directly:
|
63
|
+
|
64
|
+
include Linguistics::EN
|
65
|
+
plural( "goose" )
|
66
|
+
# => "geese"
|
67
|
+
|
68
|
+
The class-extension mechanism actually uses the functional interface behind
|
69
|
+
the scenes.
|
70
|
+
|
71
|
+
A new feature with the 0.02 release: You can now omit the language-code method
|
72
|
+
for unambiguous methods by calling Linguistics::use with the +:installProxy+
|
73
|
+
configuration key, with the language code of the language module whose methods
|
74
|
+
you wish to be available. For example, instead of having to call:
|
75
|
+
|
76
|
+
"goose".en.plural
|
77
|
+
|
78
|
+
from the example above, you can now do this:
|
79
|
+
|
80
|
+
Lingusitics::use( :en, :installProxy => :en )
|
81
|
+
"goose".plural
|
82
|
+
# => "geese"
|
83
|
+
|
84
|
+
More about how this works in the documentation for Linguistics::use.
|
85
|
+
|
86
|
+
|
87
|
+
==== Adding Language Modules
|
88
|
+
|
89
|
+
To add a new language to the framework, create a file named the same as the
|
90
|
+
ISO639 2- or 3-letter language code for the language you're adding. It must be
|
91
|
+
placed under lib/linguistics/ to be recognized by the linguistics module, but
|
92
|
+
you can also just require it yourself prior to calling Linguistics::use().
|
93
|
+
This file should define a module under Linguistics that is an all-caps version
|
94
|
+
of the code used in the filename. Any methods you wish to be exposed to users
|
95
|
+
should be declared as module functions (ie., using Module#module_function).
|
96
|
+
|
97
|
+
You may also wish to add your module to the list of default languages by
|
98
|
+
adding the appropriate symbol to the Linguistics::DefaultLanguages array.
|
99
|
+
|
100
|
+
For example, to create a Portuguese-language module, create a file called
|
101
|
+
'lib/linguistics/pt.rb' which contains the following:
|
102
|
+
|
103
|
+
module Linguistics
|
104
|
+
module PT
|
105
|
+
Linguistics::DefaultLanguages << :pt
|
106
|
+
|
107
|
+
module_function
|
108
|
+
<language methods here>
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
See the English language module (lib/linguistics/en.rb) for an example.
|
113
|
+
|
114
|
+
|
115
|
+
=== English Language Module
|
116
|
+
|
117
|
+
See the README.english file for a synopsis.
|
118
|
+
|
119
|
+
The English-language module currently contains linguistic functions ported
|
120
|
+
from a few excellent Perl modules:
|
121
|
+
|
122
|
+
Lingua::EN::Inflect
|
123
|
+
Lingua::Conjunction
|
124
|
+
Lingua::EN::Infinitive
|
125
|
+
|
126
|
+
See the lib/linguistics/en.rb file for specific attributions.
|
127
|
+
|
128
|
+
New with version 0.02: integration with the Ruby WordNet� and LinkParser
|
129
|
+
modules (which must be installed separately).
|
130
|
+
|
131
|
+
|
132
|
+
== To Do
|
133
|
+
|
134
|
+
* I am planning on improving the results from the infinitive functions, which
|
135
|
+
currently return useful results only part of the time. Investigations into
|
136
|
+
additional stemming functions and some other strategies are ongoing.
|
137
|
+
|
138
|
+
* Martin Chase <stillflame at FaerieMUD dot org> is working on an integration
|
139
|
+
module for his excellent work on a Ruby interface to the CMU Link Grammar
|
140
|
+
(an english-sentence parser). This will make writing fairly accurate natural
|
141
|
+
language parsers in Ruby much easier.
|
142
|
+
|
143
|
+
* Suggestions (and patches) for any of these items or additional features are
|
144
|
+
welcomed.
|
145
|
+
|
146
|
+
|
147
|
+
|
148
|
+
== Legal
|
149
|
+
|
150
|
+
This module is Open Source Software which is Copyright (c) 2003 by The
|
151
|
+
FaerieMUD Consortium. All rights reserved.
|
152
|
+
|
153
|
+
You may use, modify, and/or redistribute this software under the terms of the
|
154
|
+
Perl Artistic License, a copy of which should have been included in this
|
155
|
+
distribution (See the file Artistic). If it was not, a copy of it may be
|
156
|
+
obtained from http://language.perl.com/misc/Artistic.html or
|
157
|
+
http://www.faeriemud.org/artistic.html).
|
158
|
+
|
159
|
+
THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
160
|
+
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
161
|
+
MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
162
|
+
|
163
|
+
|
164
|
+
$Id$
|
165
|
+
|
166
|
+
|
data/README.english
ADDED
@@ -0,0 +1,245 @@
|
|
1
|
+
|
2
|
+
= English Ruby Linguistics Module - Synopsis
|
3
|
+
|
4
|
+
This is an overview of the functionality currently in the English functions of
|
5
|
+
the Ruby Linguistics module as of version 0.02:
|
6
|
+
|
7
|
+
|
8
|
+
== Pluralization
|
9
|
+
|
10
|
+
require 'linguistics'
|
11
|
+
Linguistics::use( :en ) # extends Array, String, and Numeric
|
12
|
+
|
13
|
+
"box".en.plural
|
14
|
+
# => "boxes"
|
15
|
+
|
16
|
+
"mouse".en.plural
|
17
|
+
# => "mice"
|
18
|
+
|
19
|
+
"ruby".en.plural
|
20
|
+
# => "rubies"
|
21
|
+
|
22
|
+
|
23
|
+
== Indefinite Articles
|
24
|
+
|
25
|
+
"book".en.a
|
26
|
+
# => "a book"
|
27
|
+
|
28
|
+
"article".en.a
|
29
|
+
# => "an article"
|
30
|
+
|
31
|
+
|
32
|
+
== Present Participles
|
33
|
+
|
34
|
+
"runs".en.present_participle
|
35
|
+
# => "running"
|
36
|
+
|
37
|
+
"eats".en.present_participle
|
38
|
+
# => "eating"
|
39
|
+
|
40
|
+
"spies".en.present_participle
|
41
|
+
# => "spying"
|
42
|
+
|
43
|
+
|
44
|
+
== Ordinal Numbers
|
45
|
+
|
46
|
+
5.en.ordinal
|
47
|
+
# => "5th"
|
48
|
+
|
49
|
+
2004.en.ordinal
|
50
|
+
# => "2004th"
|
51
|
+
|
52
|
+
|
53
|
+
== Numbers to Words
|
54
|
+
|
55
|
+
5.en.numwords
|
56
|
+
# => "five"
|
57
|
+
|
58
|
+
2004.en.numwords
|
59
|
+
# => "two thousand and four"
|
60
|
+
|
61
|
+
2385762345876.en.numwords
|
62
|
+
# => "two trillion, three hundred and eighty-five billion,
|
63
|
+
seven hundred and sixty-two million, three hundred and
|
64
|
+
forty-five thousand, eight hundred and seventy-six"
|
65
|
+
|
66
|
+
|
67
|
+
== Quantification
|
68
|
+
|
69
|
+
"cow".en.quantify( 5 )
|
70
|
+
# => "several cows"
|
71
|
+
|
72
|
+
"cow".en.quantify( 1005 )
|
73
|
+
# => "thousands of cows"
|
74
|
+
|
75
|
+
"cow".en.quantify( 20_432_123_000_000 )
|
76
|
+
# => "tens of trillions of cows"
|
77
|
+
|
78
|
+
|
79
|
+
== Conjunctions
|
80
|
+
|
81
|
+
animals = %w{dog cow ox chicken goose goat cow dog rooster llama
|
82
|
+
pig goat dog cat cat dog cow goat goose goose ox alpaca}
|
83
|
+
puts "The farm has: " + animals.en.conjunction
|
84
|
+
|
85
|
+
# => The farm has: four dogs, three cows, three geese, three goats,
|
86
|
+
two oxen, two cats, a chicken, a rooster, a llama, a pig,
|
87
|
+
and an alpaca
|
88
|
+
|
89
|
+
Note that 'goose' and 'ox' are both correctly pluralized, and the correct
|
90
|
+
indefinite article 'an' has been used for 'alpaca'.
|
91
|
+
|
92
|
+
You can also use the generalization function of the #quantify method to give
|
93
|
+
general descriptions of object lists instead of literal counts:
|
94
|
+
|
95
|
+
allobjs = []
|
96
|
+
ObjectSpace::each_object {|obj| allobjs << obj.class.name}
|
97
|
+
|
98
|
+
puts "The current Ruby objectspace contains: " +
|
99
|
+
allobjs.en.conjunction( :generalize => true )
|
100
|
+
|
101
|
+
which will print something like:
|
102
|
+
|
103
|
+
The current Ruby objectspace contains: thousands of Strings,
|
104
|
+
thousands of Arrays, hundreds of Hashes, hundreds of
|
105
|
+
Classes, many Regexps, a number of Ranges, a number of
|
106
|
+
Modules, several Floats, several Procs, several MatchDatas,
|
107
|
+
several Objects, several IOS, several Files, a Binding, a
|
108
|
+
NoMemoryError, a SystemStackError, a fatal, a ThreadGroup,
|
109
|
+
and a Thread
|
110
|
+
|
111
|
+
|
112
|
+
== Infinitives
|
113
|
+
|
114
|
+
New in version 0.02:
|
115
|
+
|
116
|
+
"leaving".en.infinitive
|
117
|
+
# => "leave"
|
118
|
+
|
119
|
+
"left".en.infinitive
|
120
|
+
# => "leave"
|
121
|
+
|
122
|
+
"leaving".en.infinitive.suffix
|
123
|
+
# => "ing"
|
124
|
+
|
125
|
+
|
126
|
+
== WordNet� Integration
|
127
|
+
|
128
|
+
Also new in version 0.02, if you have the Ruby-WordNet module installed, you can
|
129
|
+
look up WordNet synsets using the Linguistics interface:
|
130
|
+
|
131
|
+
# Test to be sure the WordNet module loaded okay.
|
132
|
+
Linguistics::EN.has_wordnet?
|
133
|
+
# => true
|
134
|
+
|
135
|
+
# Fetch the default synset for the word "balance"
|
136
|
+
"balance".synset
|
137
|
+
# => #<WordNet::Synset:0x40376844 balance (noun): "a state of equilibrium"
|
138
|
+
(derivations: 3, antonyms: 1, hypernyms: 1, hyponyms: 3)>
|
139
|
+
|
140
|
+
# Fetch the synset for the first verb sense of "balance"
|
141
|
+
"balance".en.synset( :verb )
|
142
|
+
# => #<WordNet::Synset:0x4033f448 balance, equilibrate, equilibrize, equilibrise
|
143
|
+
(verb): "bring into balance or equilibrium; "She has to balance work and her
|
144
|
+
domestic duties"; "balance the two weights"" (derivations: 7, antonyms: 1,
|
145
|
+
verbGroups: 2, hypernyms: 1, hyponyms: 5)>
|
146
|
+
|
147
|
+
# Fetch the second noun sense
|
148
|
+
"balance".en.synset( 2, :noun )
|
149
|
+
# => #<WordNet::Synset:0x404ebb24 balance (noun): "a scale for weighing; depends
|
150
|
+
on pull of gravity" (hypernyms: 1, hyponyms: 5)>
|
151
|
+
|
152
|
+
# Fetch the second noun sense's hypernyms (more-general words, like a superclass)
|
153
|
+
"balance".en.synset( 2, :noun ).hypernyms
|
154
|
+
# => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring
|
155
|
+
instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1,
|
156
|
+
hyponyms: 2)>]
|
157
|
+
|
158
|
+
# A simpler way of doing the same thing:
|
159
|
+
"balance".en.hypernyms( 2, :noun )
|
160
|
+
# => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring
|
161
|
+
instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1,
|
162
|
+
hyponyms: 2)>]
|
163
|
+
|
164
|
+
# Fetch the first hypernym's hypernyms
|
165
|
+
"balance".en.synset( 2, :noun ).hypernyms.first.hypernyms
|
166
|
+
# => [#<WordNet::Synset:0x404c60b8 measuring instrument, measuring system,
|
167
|
+
measuring device (noun): "instrument that shows the extent or amount or quantity
|
168
|
+
or degree of something" (hypernyms: 1, hyponyms: 83)>]
|
169
|
+
|
170
|
+
# Find the synset to which both the second noun sense of "balance" and the
|
171
|
+
# default sense of "shovel" belong.
|
172
|
+
("balance".en.synset( 2, :noun ) | "shovel".en.synset)
|
173
|
+
# => #<WordNet::Synset:0x40473da4 instrumentality, instrumentation (noun): "an
|
174
|
+
artifact (or system of artifacts) that is instrumental in accomplishing some
|
175
|
+
end" (derivations: 1, hypernyms: 1, hyponyms: 13)>
|
176
|
+
|
177
|
+
# Fetch just the words for the other kinds of "instruments"
|
178
|
+
"instrument".en.hyponyms.collect {|synset| synset.words}.flatten
|
179
|
+
# => ["analyzer", "analyser", "cautery", "cauterant", "drafting instrument",
|
180
|
+
"extractor", "instrument of execution", "instrument of punishment", "measuring
|
181
|
+
instrument", "measuring system", "measuring device", "medical instrument",
|
182
|
+
"navigational instrument", "optical instrument", "plotter", "scientific
|
183
|
+
instrument", "sonograph", "surveying instrument", "surveyor's instrument",
|
184
|
+
"tracer", "weapon", "arm", "weapon system", "whip"]
|
185
|
+
|
186
|
+
There are many more WordNet methods supported � too many to list here. See the
|
187
|
+
documentation for the complete list.
|
188
|
+
|
189
|
+
|
190
|
+
== LinkParser Integration
|
191
|
+
|
192
|
+
Another new feature in version 0.02 is integration with the Ruby version of the
|
193
|
+
CMU Link Grammar Parser by Martin Chase. If you have the LinkParser module
|
194
|
+
installed, you can create linkages from English sentences that let you query for
|
195
|
+
parts of speech:
|
196
|
+
|
197
|
+
# Test to see whether or not the link parser is loaded.
|
198
|
+
Linguistics::EN.has_link_parser?
|
199
|
+
# => true
|
200
|
+
|
201
|
+
# Diagram the first linkage for a test sentence
|
202
|
+
puts "he is a big dog".sentence.linkages.first.to_s
|
203
|
+
+---O*---+
|
204
|
+
| +--Ds--+
|
205
|
+
+Ss+ | +-A-+
|
206
|
+
| | | | |
|
207
|
+
he is a big dog
|
208
|
+
|
209
|
+
# Find the verb in the sentence
|
210
|
+
"he is a big dog".en.sentence.verb.to_s
|
211
|
+
# => "is"
|
212
|
+
|
213
|
+
# Combined infinitive + LinkParser: Find the infinitive form of the verb of the
|
214
|
+
given sentence.
|
215
|
+
"he is a big dog".en.sentence.verb.infinitive
|
216
|
+
# => "be"
|
217
|
+
|
218
|
+
# Find the direct object of the sentence
|
219
|
+
"he is a big dog".en.sentence.object.to_s
|
220
|
+
# => "dog"
|
221
|
+
|
222
|
+
# Look at the raw LinkParser::Word for the direct object of the sentence.
|
223
|
+
"he is a big dog".en.sentence.object
|
224
|
+
# => #<LinkParser::Word:0x403da0a0 @definition=[[{@A-}, Ds-, {@M+}, J-], [{@A-},
|
225
|
+
Ds-, {@M+}, Os-], [{@A-}, Ds-, {@M+}, Ss+, {@CO-}, {C-}], [{@A-}, Ds-, {@M+},
|
226
|
+
Ss+, R-], [{@A-}, Ds-, {@M+}, SIs-], [{@A-}, Ds-, {R+}, {Bs+}, J-], [{@A-}, Ds-,
|
227
|
+
{R+}, {Bs+}, Os-], [{@A-}, Ds-, {R+}, {Bs+}, Ss+, {@CO-}, {C-}], [{@A-}, Ds-,
|
228
|
+
{R+}, {Bs+}, Ss+, R-], [{@A-}, Ds-, {R+}, {Bs+}, SIs-]], @right=[], @suffix="",
|
229
|
+
@left=[#<LinkParser::Connection:0x403da028 @rword=#<LinkParser::Word:0x403da0a0
|
230
|
+
...>, @lword=#<LinkParser::Word:0x403da0b4 @definition=[[Ss-, O+, {@MV+}], [Ss-,
|
231
|
+
B-, {@MV+}], [Ss-, P+], [Ss-, AF-], [RS-, Bs-, O+, {@MV+}], [RS-, Bs-, B-,
|
232
|
+
{@MV+}], [RS-, Bs-, P+], [RS-, Bs-, AF-], [{Q-}, SIs+, O+, {@MV+}], [{Q-}, SIs+,
|
233
|
+
B-, {@MV+}], [{Q-}, SIs+, P+], [{Q-}, SIs+, AF-]],
|
234
|
+
@right=[#<LinkParser::Connection:0x403da028 ...>], @suffix="", @left=[],
|
235
|
+
@name="is", @position=1>, @subName="*", @name="O", @length=3>], @name="dog",
|
236
|
+
@position=4>
|
237
|
+
|
238
|
+
# Combine WordNet + LinkParser to find the definition of the direct object of
|
239
|
+
# the sentence
|
240
|
+
"he is a big dog".en.sentence.object.gloss
|
241
|
+
# => "a member of the genus Canis (probably descended from the common wolf) that
|
242
|
+
has been domesticated by man since prehistoric times; occurs in many breeds;
|
243
|
+
\"the dog barked all night\""
|
244
|
+
|
245
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,338 @@
|
|
1
|
+
#!rake
|
2
|
+
#
|
3
|
+
# Linguistics rakefile
|
4
|
+
#
|
5
|
+
# Based on various other Rakefiles, especially one by Ben Bleything
|
6
|
+
#
|
7
|
+
# Copyright (c) 2007-2009 The FaerieMUD Consortium
|
8
|
+
#
|
9
|
+
# Authors:
|
10
|
+
# * Michael Granger <ged@FaerieMUD.org>
|
11
|
+
#
|
12
|
+
|
13
|
+
BEGIN {
|
14
|
+
require 'pathname'
|
15
|
+
basedir = Pathname.new( __FILE__ ).dirname
|
16
|
+
|
17
|
+
libdir = basedir + "lib"
|
18
|
+
extdir = basedir + "ext"
|
19
|
+
|
20
|
+
$LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
|
21
|
+
$LOAD_PATH.unshift( extdir.to_s ) unless $LOAD_PATH.include?( extdir.to_s )
|
22
|
+
}
|
23
|
+
|
24
|
+
begin
|
25
|
+
require 'readline'
|
26
|
+
include Readline
|
27
|
+
rescue LoadError
|
28
|
+
# Fall back to a plain prompt
|
29
|
+
def readline( text )
|
30
|
+
$stderr.print( text.chomp )
|
31
|
+
return $stdin.gets
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
require 'rbconfig'
|
36
|
+
require 'rake'
|
37
|
+
require 'rake/testtask'
|
38
|
+
require 'rake/packagetask'
|
39
|
+
require 'rake/clean'
|
40
|
+
# require 'rake/191_compat.rb'
|
41
|
+
|
42
|
+
$dryrun = false
|
43
|
+
|
44
|
+
### Config constants
|
45
|
+
BASEDIR = Pathname.new( __FILE__ ).dirname.relative_path_from( Pathname.getwd )
|
46
|
+
BINDIR = BASEDIR + 'bin'
|
47
|
+
LIBDIR = BASEDIR + 'lib'
|
48
|
+
EXTDIR = BASEDIR + 'ext'
|
49
|
+
DOCSDIR = BASEDIR + 'docs'
|
50
|
+
PKGDIR = BASEDIR + 'pkg'
|
51
|
+
DATADIR = BASEDIR + 'data'
|
52
|
+
|
53
|
+
MANUALDIR = DOCSDIR + 'manual'
|
54
|
+
|
55
|
+
PROJECT_NAME = 'Linguistics'
|
56
|
+
PKG_NAME = PROJECT_NAME.downcase
|
57
|
+
PKG_SUMMARY = 'a framework for building linguistic utilities for Ruby objects'
|
58
|
+
|
59
|
+
# Cruisecontrol stuff
|
60
|
+
CC_BUILD_LABEL = ENV['CC_BUILD_LABEL']
|
61
|
+
CC_BUILD_ARTIFACTS = ENV['CC_BUILD_ARTIFACTS'] || 'artifacts'
|
62
|
+
|
63
|
+
VERSION_FILE = LIBDIR + 'linguistics.rb'
|
64
|
+
if VERSION_FILE.exist? && buildrev = ENV['CC_BUILD_LABEL']
|
65
|
+
PKG_VERSION = VERSION_FILE.read[ /VERSION\s*=\s*['"](\d+\.\d+\.\d+)['"]/, 1 ] + '.' + buildrev
|
66
|
+
elsif VERSION_FILE.exist?
|
67
|
+
PKG_VERSION = VERSION_FILE.read[ /VERSION\s*=\s*['"](\d+\.\d+\.\d+)['"]/, 1 ]
|
68
|
+
else
|
69
|
+
PKG_VERSION = '0.0.0'
|
70
|
+
end
|
71
|
+
|
72
|
+
PKG_FILE_NAME = "#{PKG_NAME.downcase}-#{PKG_VERSION}"
|
73
|
+
GEM_FILE_NAME = "#{PKG_FILE_NAME}.gem"
|
74
|
+
|
75
|
+
# Universal VCS constants
|
76
|
+
DEFAULT_EDITOR = 'vi'
|
77
|
+
COMMIT_MSG_FILE = 'commit-msg.txt'
|
78
|
+
FILE_INDENT = " " * 12
|
79
|
+
LOG_INDENT = " " * 3
|
80
|
+
|
81
|
+
EXTCONF = EXTDIR + 'extconf.rb'
|
82
|
+
|
83
|
+
ARTIFACTS_DIR = Pathname.new( CC_BUILD_ARTIFACTS )
|
84
|
+
|
85
|
+
TEXT_FILES = Rake::FileList.new( %w[Rakefile ChangeLog README LICENSE] )
|
86
|
+
BIN_FILES = Rake::FileList.new( "#{BINDIR}/*" )
|
87
|
+
LIB_FILES = Rake::FileList.new( "#{LIBDIR}/**/*.rb" )
|
88
|
+
EXT_FILES = Rake::FileList.new( "#{EXTDIR}/**/*.{c,h,rb}" )
|
89
|
+
DATA_FILES = Rake::FileList.new( "#{DATADIR}/**/*" )
|
90
|
+
|
91
|
+
SPECDIR = BASEDIR + 'spec'
|
92
|
+
SPECLIBDIR = SPECDIR + 'lib'
|
93
|
+
SPEC_FILES = Rake::FileList.new( "#{SPECDIR}/**/*_spec.rb", "#{SPECLIBDIR}/**/*.rb" )
|
94
|
+
|
95
|
+
TESTDIR = BASEDIR + 'tests'
|
96
|
+
TEST_FILES = Rake::FileList.new( "#{TESTDIR}/**/*.tests.rb" )
|
97
|
+
|
98
|
+
RAKE_TASKDIR = BASEDIR + 'rake'
|
99
|
+
RAKE_TASKLIBS = Rake::FileList.new( "#{RAKE_TASKDIR}/*.rb" )
|
100
|
+
PKG_TASKLIBS = Rake::FileList.new( "#{RAKE_TASKDIR}/{191_compat,helpers,packaging,rdoc,testing}.rb" )
|
101
|
+
PKG_TASKLIBS.include( "#{RAKE_TASKDIR}/manual.rb" ) if MANUALDIR.exist?
|
102
|
+
|
103
|
+
RAKE_TASKLIBS_URL = 'http://repo.deveiate.org/rake-tasklibs'
|
104
|
+
|
105
|
+
LOCAL_RAKEFILE = BASEDIR + 'Rakefile.local'
|
106
|
+
|
107
|
+
EXTRA_PKGFILES = Rake::FileList.new
|
108
|
+
EXTRA_PKGFILES.include( "#{BASEDIR}/examples/*.rb" )
|
109
|
+
EXTRA_PKGFILES.include( "#{BASEDIR}/README.english" )
|
110
|
+
|
111
|
+
RELEASE_FILES = TEXT_FILES +
|
112
|
+
SPEC_FILES +
|
113
|
+
TEST_FILES +
|
114
|
+
BIN_FILES +
|
115
|
+
LIB_FILES +
|
116
|
+
EXT_FILES +
|
117
|
+
DATA_FILES +
|
118
|
+
RAKE_TASKLIBS +
|
119
|
+
EXTRA_PKGFILES
|
120
|
+
|
121
|
+
|
122
|
+
RELEASE_FILES << LOCAL_RAKEFILE.to_s if LOCAL_RAKEFILE.exist?
|
123
|
+
|
124
|
+
COVERAGE_MINIMUM = ENV['COVERAGE_MINIMUM'] ? Float( ENV['COVERAGE_MINIMUM'] ) : 85.0
|
125
|
+
RCOV_EXCLUDES = 'spec,tests,/Library/Ruby,/var/lib,/usr/local/lib'
|
126
|
+
RCOV_OPTS = [
|
127
|
+
'--exclude', RCOV_EXCLUDES,
|
128
|
+
'--xrefs',
|
129
|
+
'--save',
|
130
|
+
'--callsites',
|
131
|
+
#'--aggregate', 'coverage.data' # <- doesn't work as of 0.8.1.2.0
|
132
|
+
]
|
133
|
+
|
134
|
+
|
135
|
+
### Load some task libraries that need to be loaded early
|
136
|
+
if !RAKE_TASKDIR.exist?
|
137
|
+
$stderr.puts "It seems you don't have the build task directory. Shall I fetch it "
|
138
|
+
ans = readline( "for you? [y]" )
|
139
|
+
ans = 'y' if !ans.nil? && ans.empty?
|
140
|
+
|
141
|
+
if ans =~ /^y/i
|
142
|
+
$stderr.puts "Okay, fetching #{RAKE_TASKLIBS_URL} into #{RAKE_TASKDIR}..."
|
143
|
+
system 'hg', 'clone', RAKE_TASKLIBS_URL, RAKE_TASKDIR
|
144
|
+
if ! $?.success?
|
145
|
+
fail "Damn. That didn't work. Giving up; maybe try manually fetching?"
|
146
|
+
end
|
147
|
+
else
|
148
|
+
$stderr.puts "Then I'm afraid I can't continue. Best of luck."
|
149
|
+
fail "Rake tasklibs not present."
|
150
|
+
end
|
151
|
+
|
152
|
+
RAKE_TASKLIBS.include( "#{RAKE_TASKDIR}/*.rb" )
|
153
|
+
end
|
154
|
+
|
155
|
+
require RAKE_TASKDIR + 'helpers.rb'
|
156
|
+
|
157
|
+
# Define some constants that depend on the 'svn' tasklib
|
158
|
+
if hg = which( 'hg' )
|
159
|
+
id = IO.read('|-') or exec hg.to_s, 'id', '-n'
|
160
|
+
PKG_BUILD = id.chomp[ /^[[:xdigit:]]+/ ]
|
161
|
+
else
|
162
|
+
PKG_BUILD = 0
|
163
|
+
end
|
164
|
+
SNAPSHOT_PKG_NAME = "#{PKG_FILE_NAME}.#{PKG_BUILD}"
|
165
|
+
SNAPSHOT_GEM_NAME = "#{SNAPSHOT_PKG_NAME}.gem"
|
166
|
+
|
167
|
+
# Documentation constants
|
168
|
+
RDOCDIR = DOCSDIR + 'api'
|
169
|
+
RDOC_OPTIONS = [
|
170
|
+
'-w', '4',
|
171
|
+
'-HN',
|
172
|
+
'-i', '.',
|
173
|
+
'-m', 'README',
|
174
|
+
'-t', PKG_NAME,
|
175
|
+
'-W', 'http://deveiate.org/projects/Linguistics/browser/'
|
176
|
+
]
|
177
|
+
|
178
|
+
# Release constants
|
179
|
+
SMTP_HOST = 'mail.faeriemud.org'
|
180
|
+
SMTP_PORT = 465 # SMTP + SSL
|
181
|
+
|
182
|
+
# Project constants
|
183
|
+
PROJECT_HOST = 'deveiate'
|
184
|
+
PROJECT_PUBDIR = '/usr/local/www/public/code'
|
185
|
+
PROJECT_DOCDIR = "#{PROJECT_PUBDIR}/#{PKG_NAME}"
|
186
|
+
PROJECT_SCPPUBURL = "#{PROJECT_HOST}:#{PROJECT_PUBDIR}"
|
187
|
+
PROJECT_SCPDOCURL = "#{PROJECT_HOST}:#{PROJECT_DOCDIR}"
|
188
|
+
|
189
|
+
# Rubyforge stuff
|
190
|
+
RUBYFORGE_GROUP = 'deveiate'
|
191
|
+
RUBYFORGE_PROJECT = 'linguistics'
|
192
|
+
|
193
|
+
# Gem dependencies: gemname => version
|
194
|
+
DEPENDENCIES = {
|
195
|
+
}
|
196
|
+
|
197
|
+
# Developer Gem dependencies: gemname => version
|
198
|
+
DEVELOPMENT_DEPENDENCIES = {
|
199
|
+
'rake' => '>= 0.8.7',
|
200
|
+
'rcodetools' => '>= 0.7.0.0',
|
201
|
+
'rcov' => '>= 0.8.1.2.0',
|
202
|
+
'rdoc' => '>= 2.4.3',
|
203
|
+
'RedCloth' => '>= 4.0.3',
|
204
|
+
'rspec' => '>= 1.2.6',
|
205
|
+
'rubyforge' => '>= 0',
|
206
|
+
'termios' => '>= 0',
|
207
|
+
'text-format' => '>= 1.0.0',
|
208
|
+
'tmail' => '>= 1.2.3.1',
|
209
|
+
'diff-lcs' => '>= 1.1.2',
|
210
|
+
'wordnet' => '>=0.0.5',
|
211
|
+
'linkparser' => '>=1.0.3',
|
212
|
+
}
|
213
|
+
|
214
|
+
# Non-gem requirements: packagename => version
|
215
|
+
REQUIREMENTS = {
|
216
|
+
}
|
217
|
+
|
218
|
+
# RubyGem specification
|
219
|
+
GEMSPEC = Gem::Specification.new do |gem|
|
220
|
+
gem.name = PKG_NAME.downcase
|
221
|
+
gem.version = PKG_VERSION
|
222
|
+
|
223
|
+
gem.summary = PKG_SUMMARY
|
224
|
+
gem.description = [
|
225
|
+
"in any language. It includes a generic language-independant front end, a",
|
226
|
+
"module for mapping language codes into language names, and a module which",
|
227
|
+
"contains various English-language utilities.",
|
228
|
+
].join( "\n" )
|
229
|
+
|
230
|
+
gem.authors = "Michael Granger"
|
231
|
+
gem.email = ["ged@FaerieMUD.org"]
|
232
|
+
gem.homepage = 'http://deveiate.org/projects/Linguistics/'
|
233
|
+
|
234
|
+
# Apparently this isn't actually the 'project'?
|
235
|
+
gem.rubyforge_project = RUBYFORGE_GROUP
|
236
|
+
|
237
|
+
gem.has_rdoc = true
|
238
|
+
gem.rdoc_options = RDOC_OPTIONS
|
239
|
+
gem.extra_rdoc_files = %w[ChangeLog README LICENSE]
|
240
|
+
|
241
|
+
gem.bindir = BINDIR.relative_path_from(BASEDIR).to_s
|
242
|
+
gem.executables = BIN_FILES.select {|pn| File.executable?(pn) }.
|
243
|
+
collect {|pn| File.basename(pn) }
|
244
|
+
gem.require_paths << EXTDIR.relative_path_from( BASEDIR ).to_s if EXTDIR.exist?
|
245
|
+
|
246
|
+
if EXTCONF.exist?
|
247
|
+
gem.extensions << EXTCONF.relative_path_from( BASEDIR ).to_s
|
248
|
+
end
|
249
|
+
|
250
|
+
gem.files = RELEASE_FILES
|
251
|
+
gem.test_files = SPEC_FILES
|
252
|
+
|
253
|
+
DEPENDENCIES.each do |name, version|
|
254
|
+
version = '>= 0' if version.length.zero?
|
255
|
+
gem.add_runtime_dependency( name, version )
|
256
|
+
end
|
257
|
+
|
258
|
+
REQUIREMENTS.each do |name, version|
|
259
|
+
gem.requirements << [ name, version ].compact.join(' ')
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
$trace = Rake.application.options.trace ? true : false
|
264
|
+
$dryrun = Rake.application.options.dryrun ? true : false
|
265
|
+
$include_dev_dependencies = false
|
266
|
+
|
267
|
+
# Load any remaining task libraries
|
268
|
+
RAKE_TASKLIBS.each do |tasklib|
|
269
|
+
next if tasklib.to_s =~ %r{/helpers\.rb$}
|
270
|
+
begin
|
271
|
+
trace " loading tasklib %s" % [ tasklib ]
|
272
|
+
import tasklib
|
273
|
+
rescue ScriptError => err
|
274
|
+
fail "Task library '%s' failed to load: %s: %s" %
|
275
|
+
[ tasklib, err.class.name, err.message ]
|
276
|
+
trace "Backtrace: \n " + err.backtrace.join( "\n " )
|
277
|
+
rescue => err
|
278
|
+
log "Task library '%s' failed to load: %s: %s. Some tasks may not be available." %
|
279
|
+
[ tasklib, err.class.name, err.message ]
|
280
|
+
trace "Backtrace: \n " + err.backtrace.join( "\n " )
|
281
|
+
end
|
282
|
+
end
|
283
|
+
|
284
|
+
# Load any project-specific rules defined in 'Rakefile.local' if it exists
|
285
|
+
import LOCAL_RAKEFILE if LOCAL_RAKEFILE.exist?
|
286
|
+
|
287
|
+
|
288
|
+
#####################################################################
|
289
|
+
### T A S K S
|
290
|
+
#####################################################################
|
291
|
+
|
292
|
+
### Default task
|
293
|
+
task :default => [:clean, :local, :spec, :rdoc, :package]
|
294
|
+
|
295
|
+
### Task the local Rakefile can append to -- no-op by default
|
296
|
+
task :local
|
297
|
+
|
298
|
+
### Task: clean
|
299
|
+
CLEAN.include 'coverage'
|
300
|
+
CLOBBER.include 'artifacts', 'coverage.info', PKGDIR
|
301
|
+
|
302
|
+
### Task: changelog
|
303
|
+
file 'ChangeLog' do |task|
|
304
|
+
log "Updating #{task.name}"
|
305
|
+
|
306
|
+
changelog = make_changelog()
|
307
|
+
File.open( task.name, 'w' ) do |fh|
|
308
|
+
fh.print( changelog )
|
309
|
+
end
|
310
|
+
end
|
311
|
+
|
312
|
+
|
313
|
+
### Task: cruise (Cruisecontrol task)
|
314
|
+
desc "Cruisecontrol build"
|
315
|
+
task :cruise => [:clean, 'spec:quiet', :package] do |task|
|
316
|
+
raise "Artifacts dir not set." if ARTIFACTS_DIR.to_s.empty?
|
317
|
+
artifact_dir = ARTIFACTS_DIR.cleanpath + (CC_BUILD_LABEL || Time.now.strftime('%Y%m%d-%T'))
|
318
|
+
artifact_dir.mkpath
|
319
|
+
|
320
|
+
coverage = BASEDIR + 'coverage'
|
321
|
+
if coverage.exist? && coverage.directory?
|
322
|
+
$stderr.puts "Copying coverage stats..."
|
323
|
+
FileUtils.cp_r( 'coverage', artifact_dir )
|
324
|
+
end
|
325
|
+
|
326
|
+
$stderr.puts "Copying packages..."
|
327
|
+
FileUtils.cp_r( FileList['pkg/*'].to_a, artifact_dir )
|
328
|
+
end
|
329
|
+
|
330
|
+
|
331
|
+
desc "Update the build system to the latest version"
|
332
|
+
task :update_build do
|
333
|
+
log "Updating the build system"
|
334
|
+
run 'hg', '-R', RAKE_TASKDIR, 'pull', '-u'
|
335
|
+
log "Updating the Rakefile"
|
336
|
+
sh 'rake', '-f', RAKE_TASKDIR + 'Metarakefile'
|
337
|
+
end
|
338
|
+
|