linguistics 1.0.8
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +640 -0
- data/LICENSE +27 -0
- data/README +166 -0
- data/README.english +245 -0
- data/Rakefile +338 -0
- data/examples/generalize_sentence.rb +46 -0
- data/lib/linguistics.rb +366 -0
- data/lib/linguistics/en.rb +1728 -0
- data/lib/linguistics/en/infinitive.rb +1145 -0
- data/lib/linguistics/en/linkparser.rb +109 -0
- data/lib/linguistics/en/wordnet.rb +257 -0
- data/lib/linguistics/iso639.rb +461 -0
- data/rake/191_compat.rb +26 -0
- data/rake/dependencies.rb +76 -0
- data/rake/helpers.rb +434 -0
- data/rake/hg.rb +261 -0
- data/rake/manual.rb +782 -0
- data/rake/packaging.rb +144 -0
- data/rake/publishing.rb +318 -0
- data/rake/rdoc.rb +30 -0
- data/rake/style.rb +62 -0
- data/rake/svn.rb +668 -0
- data/rake/testing.rb +187 -0
- data/rake/verifytask.rb +64 -0
- data/rake/win32.rb +190 -0
- data/spec/linguistics/en_spec.rb +215 -0
- data/spec/linguistics/iso639_spec.rb +72 -0
- data/spec/linguistics_spec.rb +107 -0
- data/tests/en/infinitive.tests.rb +207 -0
- data/tests/en/inflect.tests.rb +1389 -0
- data/tests/en/lafcadio.tests.rb +77 -0
- data/tests/en/linkparser.tests.rb +42 -0
- data/tests/en/lprintf.tests.rb +77 -0
- data/tests/en/titlecase.tests.rb +73 -0
- data/tests/en/wordnet.tests.rb +95 -0
- metadata +107 -0
data/LICENSE
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Copyright (c) 2003-2008, Michael Granger
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
6
|
+
|
7
|
+
* Redistributions of source code must retain the above copyright notice,
|
8
|
+
this list of conditions and the following disclaimer.
|
9
|
+
|
10
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
11
|
+
this list of conditions and the following disclaimer in the documentation
|
12
|
+
and/or other materials provided with the distribution.
|
13
|
+
|
14
|
+
* Neither the name of the author/s, nor the names of the project's
|
15
|
+
contributors may be used to endorse or promote products derived from this
|
16
|
+
software without specific prior written permission.
|
17
|
+
|
18
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
19
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
20
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
21
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
22
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
23
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
24
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
25
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
26
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README
ADDED
@@ -0,0 +1,166 @@
|
|
1
|
+
|
2
|
+
= Linguistics
|
3
|
+
|
4
|
+
== Authors
|
5
|
+
|
6
|
+
* Michael Granger <ged@FaerieMUD.org>
|
7
|
+
* Martin Chase <stillflame@FaerieMUD.org>
|
8
|
+
|
9
|
+
|
10
|
+
== Requirements
|
11
|
+
|
12
|
+
* Ruby >= 1.8.6
|
13
|
+
|
14
|
+
|
15
|
+
== Optional
|
16
|
+
|
17
|
+
* Ruby-WordNet (>= 0.0.5) - adds integration for the Ruby binding for the
|
18
|
+
WordNet� lexical refrence system.
|
19
|
+
|
20
|
+
URL: http://deveiate.org/projects/Ruby-WordNet
|
21
|
+
|
22
|
+
* LinkParser (>= 1.0.5)
|
23
|
+
|
24
|
+
URL: http://deveiate.org/projects/Ruby-LinkParser
|
25
|
+
|
26
|
+
|
27
|
+
== General Information
|
28
|
+
|
29
|
+
Linguistics is a framework for building linguistic utilities for Ruby objects
|
30
|
+
in any language. It includes a generic language-independant front end, a
|
31
|
+
module for mapping language codes into language names, and a module which
|
32
|
+
contains various English-language utilities.
|
33
|
+
|
34
|
+
|
35
|
+
=== Method Interface
|
36
|
+
|
37
|
+
The Linguistics module comes with a language-independant mechanism for
|
38
|
+
extending core Ruby classes with linguistic methods.
|
39
|
+
|
40
|
+
It consists of three parts: a core linguistics module which contains the
|
41
|
+
class-extension framework for languages, a generic inflector class that serves
|
42
|
+
as a delegator for linguistic methods on Ruby objects, and one or more
|
43
|
+
language-specific modules which contain the actual linguistic functions.
|
44
|
+
|
45
|
+
The module works by adding a single instance method for each language named
|
46
|
+
after the language's two-letter code (or three-letter code, if no two-letter
|
47
|
+
code is defined by ISO639) to various Ruby classes. This allows many
|
48
|
+
language-specific methods to be added to objects without cluttering up the
|
49
|
+
interface or risking collision between them, albeit at the cost of three or four
|
50
|
+
more characters per method invocation.
|
51
|
+
|
52
|
+
If you don't like extending core Ruby classes, the language modules should
|
53
|
+
also allow you to use them as a function library as well.
|
54
|
+
|
55
|
+
For example, the English-language module contains a #plural function which can
|
56
|
+
be accessed via a method on a core class:
|
57
|
+
|
58
|
+
Linguistics::use( :en )
|
59
|
+
"goose".en.plural
|
60
|
+
# => "geese"
|
61
|
+
|
62
|
+
or via the Linguistics::EN::plural function directly:
|
63
|
+
|
64
|
+
include Linguistics::EN
|
65
|
+
plural( "goose" )
|
66
|
+
# => "geese"
|
67
|
+
|
68
|
+
The class-extension mechanism actually uses the functional interface behind
|
69
|
+
the scenes.
|
70
|
+
|
71
|
+
A new feature with the 0.02 release: You can now omit the language-code method
|
72
|
+
for unambiguous methods by calling Linguistics::use with the +:installProxy+
|
73
|
+
configuration key, with the language code of the language module whose methods
|
74
|
+
you wish to be available. For example, instead of having to call:
|
75
|
+
|
76
|
+
"goose".en.plural
|
77
|
+
|
78
|
+
from the example above, you can now do this:
|
79
|
+
|
80
|
+
Lingusitics::use( :en, :installProxy => :en )
|
81
|
+
"goose".plural
|
82
|
+
# => "geese"
|
83
|
+
|
84
|
+
More about how this works in the documentation for Linguistics::use.
|
85
|
+
|
86
|
+
|
87
|
+
==== Adding Language Modules
|
88
|
+
|
89
|
+
To add a new language to the framework, create a file named the same as the
|
90
|
+
ISO639 2- or 3-letter language code for the language you're adding. It must be
|
91
|
+
placed under lib/linguistics/ to be recognized by the linguistics module, but
|
92
|
+
you can also just require it yourself prior to calling Linguistics::use().
|
93
|
+
This file should define a module under Linguistics that is an all-caps version
|
94
|
+
of the code used in the filename. Any methods you wish to be exposed to users
|
95
|
+
should be declared as module functions (ie., using Module#module_function).
|
96
|
+
|
97
|
+
You may also wish to add your module to the list of default languages by
|
98
|
+
adding the appropriate symbol to the Linguistics::DefaultLanguages array.
|
99
|
+
|
100
|
+
For example, to create a Portuguese-language module, create a file called
|
101
|
+
'lib/linguistics/pt.rb' which contains the following:
|
102
|
+
|
103
|
+
module Linguistics
|
104
|
+
module PT
|
105
|
+
Linguistics::DefaultLanguages << :pt
|
106
|
+
|
107
|
+
module_function
|
108
|
+
<language methods here>
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
See the English language module (lib/linguistics/en.rb) for an example.
|
113
|
+
|
114
|
+
|
115
|
+
=== English Language Module
|
116
|
+
|
117
|
+
See the README.english file for a synopsis.
|
118
|
+
|
119
|
+
The English-language module currently contains linguistic functions ported
|
120
|
+
from a few excellent Perl modules:
|
121
|
+
|
122
|
+
Lingua::EN::Inflect
|
123
|
+
Lingua::Conjunction
|
124
|
+
Lingua::EN::Infinitive
|
125
|
+
|
126
|
+
See the lib/linguistics/en.rb file for specific attributions.
|
127
|
+
|
128
|
+
New with version 0.02: integration with the Ruby WordNet� and LinkParser
|
129
|
+
modules (which must be installed separately).
|
130
|
+
|
131
|
+
|
132
|
+
== To Do
|
133
|
+
|
134
|
+
* I am planning on improving the results from the infinitive functions, which
|
135
|
+
currently return useful results only part of the time. Investigations into
|
136
|
+
additional stemming functions and some other strategies are ongoing.
|
137
|
+
|
138
|
+
* Martin Chase <stillflame at FaerieMUD dot org> is working on an integration
|
139
|
+
module for his excellent work on a Ruby interface to the CMU Link Grammar
|
140
|
+
(an english-sentence parser). This will make writing fairly accurate natural
|
141
|
+
language parsers in Ruby much easier.
|
142
|
+
|
143
|
+
* Suggestions (and patches) for any of these items or additional features are
|
144
|
+
welcomed.
|
145
|
+
|
146
|
+
|
147
|
+
|
148
|
+
== Legal
|
149
|
+
|
150
|
+
This module is Open Source Software which is Copyright (c) 2003 by The
|
151
|
+
FaerieMUD Consortium. All rights reserved.
|
152
|
+
|
153
|
+
You may use, modify, and/or redistribute this software under the terms of the
|
154
|
+
Perl Artistic License, a copy of which should have been included in this
|
155
|
+
distribution (See the file Artistic). If it was not, a copy of it may be
|
156
|
+
obtained from http://language.perl.com/misc/Artistic.html or
|
157
|
+
http://www.faeriemud.org/artistic.html).
|
158
|
+
|
159
|
+
THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
160
|
+
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
161
|
+
MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
162
|
+
|
163
|
+
|
164
|
+
$Id$
|
165
|
+
|
166
|
+
|
data/README.english
ADDED
@@ -0,0 +1,245 @@
|
|
1
|
+
|
2
|
+
= English Ruby Linguistics Module - Synopsis
|
3
|
+
|
4
|
+
This is an overview of the functionality currently in the English functions of
|
5
|
+
the Ruby Linguistics module as of version 0.02:
|
6
|
+
|
7
|
+
|
8
|
+
== Pluralization
|
9
|
+
|
10
|
+
require 'linguistics'
|
11
|
+
Linguistics::use( :en ) # extends Array, String, and Numeric
|
12
|
+
|
13
|
+
"box".en.plural
|
14
|
+
# => "boxes"
|
15
|
+
|
16
|
+
"mouse".en.plural
|
17
|
+
# => "mice"
|
18
|
+
|
19
|
+
"ruby".en.plural
|
20
|
+
# => "rubies"
|
21
|
+
|
22
|
+
|
23
|
+
== Indefinite Articles
|
24
|
+
|
25
|
+
"book".en.a
|
26
|
+
# => "a book"
|
27
|
+
|
28
|
+
"article".en.a
|
29
|
+
# => "an article"
|
30
|
+
|
31
|
+
|
32
|
+
== Present Participles
|
33
|
+
|
34
|
+
"runs".en.present_participle
|
35
|
+
# => "running"
|
36
|
+
|
37
|
+
"eats".en.present_participle
|
38
|
+
# => "eating"
|
39
|
+
|
40
|
+
"spies".en.present_participle
|
41
|
+
# => "spying"
|
42
|
+
|
43
|
+
|
44
|
+
== Ordinal Numbers
|
45
|
+
|
46
|
+
5.en.ordinal
|
47
|
+
# => "5th"
|
48
|
+
|
49
|
+
2004.en.ordinal
|
50
|
+
# => "2004th"
|
51
|
+
|
52
|
+
|
53
|
+
== Numbers to Words
|
54
|
+
|
55
|
+
5.en.numwords
|
56
|
+
# => "five"
|
57
|
+
|
58
|
+
2004.en.numwords
|
59
|
+
# => "two thousand and four"
|
60
|
+
|
61
|
+
2385762345876.en.numwords
|
62
|
+
# => "two trillion, three hundred and eighty-five billion,
|
63
|
+
seven hundred and sixty-two million, three hundred and
|
64
|
+
forty-five thousand, eight hundred and seventy-six"
|
65
|
+
|
66
|
+
|
67
|
+
== Quantification
|
68
|
+
|
69
|
+
"cow".en.quantify( 5 )
|
70
|
+
# => "several cows"
|
71
|
+
|
72
|
+
"cow".en.quantify( 1005 )
|
73
|
+
# => "thousands of cows"
|
74
|
+
|
75
|
+
"cow".en.quantify( 20_432_123_000_000 )
|
76
|
+
# => "tens of trillions of cows"
|
77
|
+
|
78
|
+
|
79
|
+
== Conjunctions
|
80
|
+
|
81
|
+
animals = %w{dog cow ox chicken goose goat cow dog rooster llama
|
82
|
+
pig goat dog cat cat dog cow goat goose goose ox alpaca}
|
83
|
+
puts "The farm has: " + animals.en.conjunction
|
84
|
+
|
85
|
+
# => The farm has: four dogs, three cows, three geese, three goats,
|
86
|
+
two oxen, two cats, a chicken, a rooster, a llama, a pig,
|
87
|
+
and an alpaca
|
88
|
+
|
89
|
+
Note that 'goose' and 'ox' are both correctly pluralized, and the correct
|
90
|
+
indefinite article 'an' has been used for 'alpaca'.
|
91
|
+
|
92
|
+
You can also use the generalization function of the #quantify method to give
|
93
|
+
general descriptions of object lists instead of literal counts:
|
94
|
+
|
95
|
+
allobjs = []
|
96
|
+
ObjectSpace::each_object {|obj| allobjs << obj.class.name}
|
97
|
+
|
98
|
+
puts "The current Ruby objectspace contains: " +
|
99
|
+
allobjs.en.conjunction( :generalize => true )
|
100
|
+
|
101
|
+
which will print something like:
|
102
|
+
|
103
|
+
The current Ruby objectspace contains: thousands of Strings,
|
104
|
+
thousands of Arrays, hundreds of Hashes, hundreds of
|
105
|
+
Classes, many Regexps, a number of Ranges, a number of
|
106
|
+
Modules, several Floats, several Procs, several MatchDatas,
|
107
|
+
several Objects, several IOS, several Files, a Binding, a
|
108
|
+
NoMemoryError, a SystemStackError, a fatal, a ThreadGroup,
|
109
|
+
and a Thread
|
110
|
+
|
111
|
+
|
112
|
+
== Infinitives
|
113
|
+
|
114
|
+
New in version 0.02:
|
115
|
+
|
116
|
+
"leaving".en.infinitive
|
117
|
+
# => "leave"
|
118
|
+
|
119
|
+
"left".en.infinitive
|
120
|
+
# => "leave"
|
121
|
+
|
122
|
+
"leaving".en.infinitive.suffix
|
123
|
+
# => "ing"
|
124
|
+
|
125
|
+
|
126
|
+
== WordNet� Integration
|
127
|
+
|
128
|
+
Also new in version 0.02, if you have the Ruby-WordNet module installed, you can
|
129
|
+
look up WordNet synsets using the Linguistics interface:
|
130
|
+
|
131
|
+
# Test to be sure the WordNet module loaded okay.
|
132
|
+
Linguistics::EN.has_wordnet?
|
133
|
+
# => true
|
134
|
+
|
135
|
+
# Fetch the default synset for the word "balance"
|
136
|
+
"balance".synset
|
137
|
+
# => #<WordNet::Synset:0x40376844 balance (noun): "a state of equilibrium"
|
138
|
+
(derivations: 3, antonyms: 1, hypernyms: 1, hyponyms: 3)>
|
139
|
+
|
140
|
+
# Fetch the synset for the first verb sense of "balance"
|
141
|
+
"balance".en.synset( :verb )
|
142
|
+
# => #<WordNet::Synset:0x4033f448 balance, equilibrate, equilibrize, equilibrise
|
143
|
+
(verb): "bring into balance or equilibrium; "She has to balance work and her
|
144
|
+
domestic duties"; "balance the two weights"" (derivations: 7, antonyms: 1,
|
145
|
+
verbGroups: 2, hypernyms: 1, hyponyms: 5)>
|
146
|
+
|
147
|
+
# Fetch the second noun sense
|
148
|
+
"balance".en.synset( 2, :noun )
|
149
|
+
# => #<WordNet::Synset:0x404ebb24 balance (noun): "a scale for weighing; depends
|
150
|
+
on pull of gravity" (hypernyms: 1, hyponyms: 5)>
|
151
|
+
|
152
|
+
# Fetch the second noun sense's hypernyms (more-general words, like a superclass)
|
153
|
+
"balance".en.synset( 2, :noun ).hypernyms
|
154
|
+
# => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring
|
155
|
+
instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1,
|
156
|
+
hyponyms: 2)>]
|
157
|
+
|
158
|
+
# A simpler way of doing the same thing:
|
159
|
+
"balance".en.hypernyms( 2, :noun )
|
160
|
+
# => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring
|
161
|
+
instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1,
|
162
|
+
hyponyms: 2)>]
|
163
|
+
|
164
|
+
# Fetch the first hypernym's hypernyms
|
165
|
+
"balance".en.synset( 2, :noun ).hypernyms.first.hypernyms
|
166
|
+
# => [#<WordNet::Synset:0x404c60b8 measuring instrument, measuring system,
|
167
|
+
measuring device (noun): "instrument that shows the extent or amount or quantity
|
168
|
+
or degree of something" (hypernyms: 1, hyponyms: 83)>]
|
169
|
+
|
170
|
+
# Find the synset to which both the second noun sense of "balance" and the
|
171
|
+
# default sense of "shovel" belong.
|
172
|
+
("balance".en.synset( 2, :noun ) | "shovel".en.synset)
|
173
|
+
# => #<WordNet::Synset:0x40473da4 instrumentality, instrumentation (noun): "an
|
174
|
+
artifact (or system of artifacts) that is instrumental in accomplishing some
|
175
|
+
end" (derivations: 1, hypernyms: 1, hyponyms: 13)>
|
176
|
+
|
177
|
+
# Fetch just the words for the other kinds of "instruments"
|
178
|
+
"instrument".en.hyponyms.collect {|synset| synset.words}.flatten
|
179
|
+
# => ["analyzer", "analyser", "cautery", "cauterant", "drafting instrument",
|
180
|
+
"extractor", "instrument of execution", "instrument of punishment", "measuring
|
181
|
+
instrument", "measuring system", "measuring device", "medical instrument",
|
182
|
+
"navigational instrument", "optical instrument", "plotter", "scientific
|
183
|
+
instrument", "sonograph", "surveying instrument", "surveyor's instrument",
|
184
|
+
"tracer", "weapon", "arm", "weapon system", "whip"]
|
185
|
+
|
186
|
+
There are many more WordNet methods supported � too many to list here. See the
|
187
|
+
documentation for the complete list.
|
188
|
+
|
189
|
+
|
190
|
+
== LinkParser Integration
|
191
|
+
|
192
|
+
Another new feature in version 0.02 is integration with the Ruby version of the
|
193
|
+
CMU Link Grammar Parser by Martin Chase. If you have the LinkParser module
|
194
|
+
installed, you can create linkages from English sentences that let you query for
|
195
|
+
parts of speech:
|
196
|
+
|
197
|
+
# Test to see whether or not the link parser is loaded.
|
198
|
+
Linguistics::EN.has_link_parser?
|
199
|
+
# => true
|
200
|
+
|
201
|
+
# Diagram the first linkage for a test sentence
|
202
|
+
puts "he is a big dog".sentence.linkages.first.to_s
|
203
|
+
+---O*---+
|
204
|
+
| +--Ds--+
|
205
|
+
+Ss+ | +-A-+
|
206
|
+
| | | | |
|
207
|
+
he is a big dog
|
208
|
+
|
209
|
+
# Find the verb in the sentence
|
210
|
+
"he is a big dog".en.sentence.verb.to_s
|
211
|
+
# => "is"
|
212
|
+
|
213
|
+
# Combined infinitive + LinkParser: Find the infinitive form of the verb of the
|
214
|
+
given sentence.
|
215
|
+
"he is a big dog".en.sentence.verb.infinitive
|
216
|
+
# => "be"
|
217
|
+
|
218
|
+
# Find the direct object of the sentence
|
219
|
+
"he is a big dog".en.sentence.object.to_s
|
220
|
+
# => "dog"
|
221
|
+
|
222
|
+
# Look at the raw LinkParser::Word for the direct object of the sentence.
|
223
|
+
"he is a big dog".en.sentence.object
|
224
|
+
# => #<LinkParser::Word:0x403da0a0 @definition=[[{@A-}, Ds-, {@M+}, J-], [{@A-},
|
225
|
+
Ds-, {@M+}, Os-], [{@A-}, Ds-, {@M+}, Ss+, {@CO-}, {C-}], [{@A-}, Ds-, {@M+},
|
226
|
+
Ss+, R-], [{@A-}, Ds-, {@M+}, SIs-], [{@A-}, Ds-, {R+}, {Bs+}, J-], [{@A-}, Ds-,
|
227
|
+
{R+}, {Bs+}, Os-], [{@A-}, Ds-, {R+}, {Bs+}, Ss+, {@CO-}, {C-}], [{@A-}, Ds-,
|
228
|
+
{R+}, {Bs+}, Ss+, R-], [{@A-}, Ds-, {R+}, {Bs+}, SIs-]], @right=[], @suffix="",
|
229
|
+
@left=[#<LinkParser::Connection:0x403da028 @rword=#<LinkParser::Word:0x403da0a0
|
230
|
+
...>, @lword=#<LinkParser::Word:0x403da0b4 @definition=[[Ss-, O+, {@MV+}], [Ss-,
|
231
|
+
B-, {@MV+}], [Ss-, P+], [Ss-, AF-], [RS-, Bs-, O+, {@MV+}], [RS-, Bs-, B-,
|
232
|
+
{@MV+}], [RS-, Bs-, P+], [RS-, Bs-, AF-], [{Q-}, SIs+, O+, {@MV+}], [{Q-}, SIs+,
|
233
|
+
B-, {@MV+}], [{Q-}, SIs+, P+], [{Q-}, SIs+, AF-]],
|
234
|
+
@right=[#<LinkParser::Connection:0x403da028 ...>], @suffix="", @left=[],
|
235
|
+
@name="is", @position=1>, @subName="*", @name="O", @length=3>], @name="dog",
|
236
|
+
@position=4>
|
237
|
+
|
238
|
+
# Combine WordNet + LinkParser to find the definition of the direct object of
|
239
|
+
# the sentence
|
240
|
+
"he is a big dog".en.sentence.object.gloss
|
241
|
+
# => "a member of the genus Canis (probably descended from the common wolf) that
|
242
|
+
has been domesticated by man since prehistoric times; occurs in many breeds;
|
243
|
+
\"the dog barked all night\""
|
244
|
+
|
245
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,338 @@
|
|
1
|
+
#!rake
|
2
|
+
#
|
3
|
+
# Linguistics rakefile
|
4
|
+
#
|
5
|
+
# Based on various other Rakefiles, especially one by Ben Bleything
|
6
|
+
#
|
7
|
+
# Copyright (c) 2007-2009 The FaerieMUD Consortium
|
8
|
+
#
|
9
|
+
# Authors:
|
10
|
+
# * Michael Granger <ged@FaerieMUD.org>
|
11
|
+
#
|
12
|
+
|
13
|
+
BEGIN {
|
14
|
+
require 'pathname'
|
15
|
+
basedir = Pathname.new( __FILE__ ).dirname
|
16
|
+
|
17
|
+
libdir = basedir + "lib"
|
18
|
+
extdir = basedir + "ext"
|
19
|
+
|
20
|
+
$LOAD_PATH.unshift( libdir.to_s ) unless $LOAD_PATH.include?( libdir.to_s )
|
21
|
+
$LOAD_PATH.unshift( extdir.to_s ) unless $LOAD_PATH.include?( extdir.to_s )
|
22
|
+
}
|
23
|
+
|
24
|
+
begin
|
25
|
+
require 'readline'
|
26
|
+
include Readline
|
27
|
+
rescue LoadError
|
28
|
+
# Fall back to a plain prompt
|
29
|
+
def readline( text )
|
30
|
+
$stderr.print( text.chomp )
|
31
|
+
return $stdin.gets
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
require 'rbconfig'
|
36
|
+
require 'rake'
|
37
|
+
require 'rake/testtask'
|
38
|
+
require 'rake/packagetask'
|
39
|
+
require 'rake/clean'
|
40
|
+
# require 'rake/191_compat.rb'
|
41
|
+
|
42
|
+
$dryrun = false
|
43
|
+
|
44
|
+
### Config constants
|
45
|
+
BASEDIR = Pathname.new( __FILE__ ).dirname.relative_path_from( Pathname.getwd )
|
46
|
+
BINDIR = BASEDIR + 'bin'
|
47
|
+
LIBDIR = BASEDIR + 'lib'
|
48
|
+
EXTDIR = BASEDIR + 'ext'
|
49
|
+
DOCSDIR = BASEDIR + 'docs'
|
50
|
+
PKGDIR = BASEDIR + 'pkg'
|
51
|
+
DATADIR = BASEDIR + 'data'
|
52
|
+
|
53
|
+
MANUALDIR = DOCSDIR + 'manual'
|
54
|
+
|
55
|
+
PROJECT_NAME = 'Linguistics'
|
56
|
+
PKG_NAME = PROJECT_NAME.downcase
|
57
|
+
PKG_SUMMARY = 'a framework for building linguistic utilities for Ruby objects'
|
58
|
+
|
59
|
+
# Cruisecontrol stuff
|
60
|
+
CC_BUILD_LABEL = ENV['CC_BUILD_LABEL']
|
61
|
+
CC_BUILD_ARTIFACTS = ENV['CC_BUILD_ARTIFACTS'] || 'artifacts'
|
62
|
+
|
63
|
+
VERSION_FILE = LIBDIR + 'linguistics.rb'
|
64
|
+
if VERSION_FILE.exist? && buildrev = ENV['CC_BUILD_LABEL']
|
65
|
+
PKG_VERSION = VERSION_FILE.read[ /VERSION\s*=\s*['"](\d+\.\d+\.\d+)['"]/, 1 ] + '.' + buildrev
|
66
|
+
elsif VERSION_FILE.exist?
|
67
|
+
PKG_VERSION = VERSION_FILE.read[ /VERSION\s*=\s*['"](\d+\.\d+\.\d+)['"]/, 1 ]
|
68
|
+
else
|
69
|
+
PKG_VERSION = '0.0.0'
|
70
|
+
end
|
71
|
+
|
72
|
+
PKG_FILE_NAME = "#{PKG_NAME.downcase}-#{PKG_VERSION}"
|
73
|
+
GEM_FILE_NAME = "#{PKG_FILE_NAME}.gem"
|
74
|
+
|
75
|
+
# Universal VCS constants
|
76
|
+
DEFAULT_EDITOR = 'vi'
|
77
|
+
COMMIT_MSG_FILE = 'commit-msg.txt'
|
78
|
+
FILE_INDENT = " " * 12
|
79
|
+
LOG_INDENT = " " * 3
|
80
|
+
|
81
|
+
EXTCONF = EXTDIR + 'extconf.rb'
|
82
|
+
|
83
|
+
ARTIFACTS_DIR = Pathname.new( CC_BUILD_ARTIFACTS )
|
84
|
+
|
85
|
+
TEXT_FILES = Rake::FileList.new( %w[Rakefile ChangeLog README LICENSE] )
|
86
|
+
BIN_FILES = Rake::FileList.new( "#{BINDIR}/*" )
|
87
|
+
LIB_FILES = Rake::FileList.new( "#{LIBDIR}/**/*.rb" )
|
88
|
+
EXT_FILES = Rake::FileList.new( "#{EXTDIR}/**/*.{c,h,rb}" )
|
89
|
+
DATA_FILES = Rake::FileList.new( "#{DATADIR}/**/*" )
|
90
|
+
|
91
|
+
SPECDIR = BASEDIR + 'spec'
|
92
|
+
SPECLIBDIR = SPECDIR + 'lib'
|
93
|
+
SPEC_FILES = Rake::FileList.new( "#{SPECDIR}/**/*_spec.rb", "#{SPECLIBDIR}/**/*.rb" )
|
94
|
+
|
95
|
+
TESTDIR = BASEDIR + 'tests'
|
96
|
+
TEST_FILES = Rake::FileList.new( "#{TESTDIR}/**/*.tests.rb" )
|
97
|
+
|
98
|
+
RAKE_TASKDIR = BASEDIR + 'rake'
|
99
|
+
RAKE_TASKLIBS = Rake::FileList.new( "#{RAKE_TASKDIR}/*.rb" )
|
100
|
+
PKG_TASKLIBS = Rake::FileList.new( "#{RAKE_TASKDIR}/{191_compat,helpers,packaging,rdoc,testing}.rb" )
|
101
|
+
PKG_TASKLIBS.include( "#{RAKE_TASKDIR}/manual.rb" ) if MANUALDIR.exist?
|
102
|
+
|
103
|
+
RAKE_TASKLIBS_URL = 'http://repo.deveiate.org/rake-tasklibs'
|
104
|
+
|
105
|
+
LOCAL_RAKEFILE = BASEDIR + 'Rakefile.local'
|
106
|
+
|
107
|
+
EXTRA_PKGFILES = Rake::FileList.new
|
108
|
+
EXTRA_PKGFILES.include( "#{BASEDIR}/examples/*.rb" )
|
109
|
+
EXTRA_PKGFILES.include( "#{BASEDIR}/README.english" )
|
110
|
+
|
111
|
+
RELEASE_FILES = TEXT_FILES +
|
112
|
+
SPEC_FILES +
|
113
|
+
TEST_FILES +
|
114
|
+
BIN_FILES +
|
115
|
+
LIB_FILES +
|
116
|
+
EXT_FILES +
|
117
|
+
DATA_FILES +
|
118
|
+
RAKE_TASKLIBS +
|
119
|
+
EXTRA_PKGFILES
|
120
|
+
|
121
|
+
|
122
|
+
RELEASE_FILES << LOCAL_RAKEFILE.to_s if LOCAL_RAKEFILE.exist?
|
123
|
+
|
124
|
+
COVERAGE_MINIMUM = ENV['COVERAGE_MINIMUM'] ? Float( ENV['COVERAGE_MINIMUM'] ) : 85.0
|
125
|
+
RCOV_EXCLUDES = 'spec,tests,/Library/Ruby,/var/lib,/usr/local/lib'
|
126
|
+
RCOV_OPTS = [
|
127
|
+
'--exclude', RCOV_EXCLUDES,
|
128
|
+
'--xrefs',
|
129
|
+
'--save',
|
130
|
+
'--callsites',
|
131
|
+
#'--aggregate', 'coverage.data' # <- doesn't work as of 0.8.1.2.0
|
132
|
+
]
|
133
|
+
|
134
|
+
|
135
|
+
### Load some task libraries that need to be loaded early
|
136
|
+
if !RAKE_TASKDIR.exist?
|
137
|
+
$stderr.puts "It seems you don't have the build task directory. Shall I fetch it "
|
138
|
+
ans = readline( "for you? [y]" )
|
139
|
+
ans = 'y' if !ans.nil? && ans.empty?
|
140
|
+
|
141
|
+
if ans =~ /^y/i
|
142
|
+
$stderr.puts "Okay, fetching #{RAKE_TASKLIBS_URL} into #{RAKE_TASKDIR}..."
|
143
|
+
system 'hg', 'clone', RAKE_TASKLIBS_URL, RAKE_TASKDIR
|
144
|
+
if ! $?.success?
|
145
|
+
fail "Damn. That didn't work. Giving up; maybe try manually fetching?"
|
146
|
+
end
|
147
|
+
else
|
148
|
+
$stderr.puts "Then I'm afraid I can't continue. Best of luck."
|
149
|
+
fail "Rake tasklibs not present."
|
150
|
+
end
|
151
|
+
|
152
|
+
RAKE_TASKLIBS.include( "#{RAKE_TASKDIR}/*.rb" )
|
153
|
+
end
|
154
|
+
|
155
|
+
require RAKE_TASKDIR + 'helpers.rb'
|
156
|
+
|
157
|
+
# Define some constants that depend on the 'svn' tasklib
|
158
|
+
if hg = which( 'hg' )
|
159
|
+
id = IO.read('|-') or exec hg.to_s, 'id', '-n'
|
160
|
+
PKG_BUILD = id.chomp[ /^[[:xdigit:]]+/ ]
|
161
|
+
else
|
162
|
+
PKG_BUILD = 0
|
163
|
+
end
|
164
|
+
SNAPSHOT_PKG_NAME = "#{PKG_FILE_NAME}.#{PKG_BUILD}"
|
165
|
+
SNAPSHOT_GEM_NAME = "#{SNAPSHOT_PKG_NAME}.gem"
|
166
|
+
|
167
|
+
# Documentation constants
|
168
|
+
RDOCDIR = DOCSDIR + 'api'
|
169
|
+
RDOC_OPTIONS = [
|
170
|
+
'-w', '4',
|
171
|
+
'-HN',
|
172
|
+
'-i', '.',
|
173
|
+
'-m', 'README',
|
174
|
+
'-t', PKG_NAME,
|
175
|
+
'-W', 'http://deveiate.org/projects/Linguistics/browser/'
|
176
|
+
]
|
177
|
+
|
178
|
+
# Release constants
|
179
|
+
SMTP_HOST = 'mail.faeriemud.org'
|
180
|
+
SMTP_PORT = 465 # SMTP + SSL
|
181
|
+
|
182
|
+
# Project constants
|
183
|
+
PROJECT_HOST = 'deveiate'
|
184
|
+
PROJECT_PUBDIR = '/usr/local/www/public/code'
|
185
|
+
PROJECT_DOCDIR = "#{PROJECT_PUBDIR}/#{PKG_NAME}"
|
186
|
+
PROJECT_SCPPUBURL = "#{PROJECT_HOST}:#{PROJECT_PUBDIR}"
|
187
|
+
PROJECT_SCPDOCURL = "#{PROJECT_HOST}:#{PROJECT_DOCDIR}"
|
188
|
+
|
189
|
+
# Rubyforge stuff
|
190
|
+
RUBYFORGE_GROUP = 'deveiate'
|
191
|
+
RUBYFORGE_PROJECT = 'linguistics'
|
192
|
+
|
193
|
+
# Gem dependencies: gemname => version
|
194
|
+
DEPENDENCIES = {
|
195
|
+
}
|
196
|
+
|
197
|
+
# Developer Gem dependencies: gemname => version
|
198
|
+
DEVELOPMENT_DEPENDENCIES = {
|
199
|
+
'rake' => '>= 0.8.7',
|
200
|
+
'rcodetools' => '>= 0.7.0.0',
|
201
|
+
'rcov' => '>= 0.8.1.2.0',
|
202
|
+
'rdoc' => '>= 2.4.3',
|
203
|
+
'RedCloth' => '>= 4.0.3',
|
204
|
+
'rspec' => '>= 1.2.6',
|
205
|
+
'rubyforge' => '>= 0',
|
206
|
+
'termios' => '>= 0',
|
207
|
+
'text-format' => '>= 1.0.0',
|
208
|
+
'tmail' => '>= 1.2.3.1',
|
209
|
+
'diff-lcs' => '>= 1.1.2',
|
210
|
+
'wordnet' => '>=0.0.5',
|
211
|
+
'linkparser' => '>=1.0.3',
|
212
|
+
}
|
213
|
+
|
214
|
+
# Non-gem requirements: packagename => version
|
215
|
+
REQUIREMENTS = {
|
216
|
+
}
|
217
|
+
|
218
|
+
# RubyGem specification
|
219
|
+
GEMSPEC = Gem::Specification.new do |gem|
|
220
|
+
gem.name = PKG_NAME.downcase
|
221
|
+
gem.version = PKG_VERSION
|
222
|
+
|
223
|
+
gem.summary = PKG_SUMMARY
|
224
|
+
gem.description = [
|
225
|
+
"in any language. It includes a generic language-independant front end, a",
|
226
|
+
"module for mapping language codes into language names, and a module which",
|
227
|
+
"contains various English-language utilities.",
|
228
|
+
].join( "\n" )
|
229
|
+
|
230
|
+
gem.authors = "Michael Granger"
|
231
|
+
gem.email = ["ged@FaerieMUD.org"]
|
232
|
+
gem.homepage = 'http://deveiate.org/projects/Linguistics/'
|
233
|
+
|
234
|
+
# Apparently this isn't actually the 'project'?
|
235
|
+
gem.rubyforge_project = RUBYFORGE_GROUP
|
236
|
+
|
237
|
+
gem.has_rdoc = true
|
238
|
+
gem.rdoc_options = RDOC_OPTIONS
|
239
|
+
gem.extra_rdoc_files = %w[ChangeLog README LICENSE]
|
240
|
+
|
241
|
+
gem.bindir = BINDIR.relative_path_from(BASEDIR).to_s
|
242
|
+
gem.executables = BIN_FILES.select {|pn| File.executable?(pn) }.
|
243
|
+
collect {|pn| File.basename(pn) }
|
244
|
+
gem.require_paths << EXTDIR.relative_path_from( BASEDIR ).to_s if EXTDIR.exist?
|
245
|
+
|
246
|
+
if EXTCONF.exist?
|
247
|
+
gem.extensions << EXTCONF.relative_path_from( BASEDIR ).to_s
|
248
|
+
end
|
249
|
+
|
250
|
+
gem.files = RELEASE_FILES
|
251
|
+
gem.test_files = SPEC_FILES
|
252
|
+
|
253
|
+
DEPENDENCIES.each do |name, version|
|
254
|
+
version = '>= 0' if version.length.zero?
|
255
|
+
gem.add_runtime_dependency( name, version )
|
256
|
+
end
|
257
|
+
|
258
|
+
REQUIREMENTS.each do |name, version|
|
259
|
+
gem.requirements << [ name, version ].compact.join(' ')
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
$trace = Rake.application.options.trace ? true : false
|
264
|
+
$dryrun = Rake.application.options.dryrun ? true : false
|
265
|
+
$include_dev_dependencies = false
|
266
|
+
|
267
|
+
# Load any remaining task libraries
|
268
|
+
RAKE_TASKLIBS.each do |tasklib|
|
269
|
+
next if tasklib.to_s =~ %r{/helpers\.rb$}
|
270
|
+
begin
|
271
|
+
trace " loading tasklib %s" % [ tasklib ]
|
272
|
+
import tasklib
|
273
|
+
rescue ScriptError => err
|
274
|
+
fail "Task library '%s' failed to load: %s: %s" %
|
275
|
+
[ tasklib, err.class.name, err.message ]
|
276
|
+
trace "Backtrace: \n " + err.backtrace.join( "\n " )
|
277
|
+
rescue => err
|
278
|
+
log "Task library '%s' failed to load: %s: %s. Some tasks may not be available." %
|
279
|
+
[ tasklib, err.class.name, err.message ]
|
280
|
+
trace "Backtrace: \n " + err.backtrace.join( "\n " )
|
281
|
+
end
|
282
|
+
end
|
283
|
+
|
284
|
+
# Load any project-specific rules defined in 'Rakefile.local' if it exists
|
285
|
+
import LOCAL_RAKEFILE if LOCAL_RAKEFILE.exist?
|
286
|
+
|
287
|
+
|
288
|
+
#####################################################################
|
289
|
+
### T A S K S
|
290
|
+
#####################################################################
|
291
|
+
|
292
|
+
### Default task
|
293
|
+
task :default => [:clean, :local, :spec, :rdoc, :package]
|
294
|
+
|
295
|
+
### Task the local Rakefile can append to -- no-op by default
|
296
|
+
task :local
|
297
|
+
|
298
|
+
### Task: clean
|
299
|
+
CLEAN.include 'coverage'
|
300
|
+
CLOBBER.include 'artifacts', 'coverage.info', PKGDIR
|
301
|
+
|
302
|
+
### Task: changelog
|
303
|
+
file 'ChangeLog' do |task|
|
304
|
+
log "Updating #{task.name}"
|
305
|
+
|
306
|
+
changelog = make_changelog()
|
307
|
+
File.open( task.name, 'w' ) do |fh|
|
308
|
+
fh.print( changelog )
|
309
|
+
end
|
310
|
+
end
|
311
|
+
|
312
|
+
|
313
|
+
### Task: cruise (Cruisecontrol task)
|
314
|
+
desc "Cruisecontrol build"
|
315
|
+
task :cruise => [:clean, 'spec:quiet', :package] do |task|
|
316
|
+
raise "Artifacts dir not set." if ARTIFACTS_DIR.to_s.empty?
|
317
|
+
artifact_dir = ARTIFACTS_DIR.cleanpath + (CC_BUILD_LABEL || Time.now.strftime('%Y%m%d-%T'))
|
318
|
+
artifact_dir.mkpath
|
319
|
+
|
320
|
+
coverage = BASEDIR + 'coverage'
|
321
|
+
if coverage.exist? && coverage.directory?
|
322
|
+
$stderr.puts "Copying coverage stats..."
|
323
|
+
FileUtils.cp_r( 'coverage', artifact_dir )
|
324
|
+
end
|
325
|
+
|
326
|
+
$stderr.puts "Copying packages..."
|
327
|
+
FileUtils.cp_r( FileList['pkg/*'].to_a, artifact_dir )
|
328
|
+
end
|
329
|
+
|
330
|
+
|
331
|
+
desc "Update the build system to the latest version"
|
332
|
+
task :update_build do
|
333
|
+
log "Updating the build system"
|
334
|
+
run 'hg', '-R', RAKE_TASKDIR, 'pull', '-u'
|
335
|
+
log "Updating the Rakefile"
|
336
|
+
sh 'rake', '-f', RAKE_TASKDIR + 'Metarakefile'
|
337
|
+
end
|
338
|
+
|