words-wordnet 0.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +20 -0
- data/README.markdown +169 -0
- data/Rakefile +54 -0
- data/VERSION +1 -0
- data/bin/build_wordnet +177 -0
- data/examples.rb +55 -0
- data/lib/evocations.rb +81 -0
- data/lib/homographs.rb +100 -0
- data/lib/relation.rb +90 -0
- data/lib/synset.rb +201 -0
- data/lib/wordnet_connectors/pure_wordnet_connection.rb +224 -0
- data/lib/wordnet_connectors/tokyo_wordnet_connection.rb +141 -0
- data/lib/words.rb +172 -0
- data/spec/words_spec.rb +151 -0
- data/words.gemspec +57 -0
- metadata +95 -0
data/lib/evocations.rb
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
# local includes
|
4
|
+
require File.join(File.dirname(__FILE__), 'synset.rb')
|
5
|
+
|
6
|
+
module Words
|
7
|
+
|
8
|
+
class Evocations
|
9
|
+
|
10
|
+
def initialize(evocation_construct, source_synset, wordnet_connection)
|
11
|
+
|
12
|
+
@evocation_construct, @source, @wordnet_connection = evocation_construct, source_synset, wordnet_connection
|
13
|
+
|
14
|
+
end
|
15
|
+
|
16
|
+
def means
|
17
|
+
|
18
|
+
@means ||= @evocation_construct["means"].split('|')
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
def medians
|
23
|
+
|
24
|
+
@medians ||= @evocation_construct["medians"].split('|')
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
def size
|
29
|
+
|
30
|
+
means.size
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
def first
|
35
|
+
|
36
|
+
self[0]
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
def last
|
41
|
+
|
42
|
+
self[size-1]
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
def [] (index)
|
47
|
+
|
48
|
+
{ :destination => Synset.new(destination_ids[index], @wordnet_connection, @source.homographs), :mean => means[index], :median => medians[index] }
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
def destinations(pos = :all)
|
53
|
+
|
54
|
+
destination_ids(pos).map { |synset_id| Synset.new synset_id, @wordnet_connection, @source.homographs }
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
def destination_ids(pos = :all)
|
59
|
+
|
60
|
+
@destination_ids ||= @evocation_construct["relations"].split('|')
|
61
|
+
|
62
|
+
case
|
63
|
+
when Homographs::SYMBOL_TO_POS.include?(pos.to_sym)
|
64
|
+
@destination_ids.select { |synset_id| synset_id[0,1] == Homographs::SYMBOL_TO_POS[pos.to_sym] }
|
65
|
+
when Homographs::POS_TO_SYMBOL.include?(pos.to_s)
|
66
|
+
@destination_ids.select { |synset_id| synset_id[0,1] == pos.to_s }
|
67
|
+
else
|
68
|
+
@destination_ids
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
|
73
|
+
def to_s
|
74
|
+
|
75
|
+
"#{size} evocations from the #{@source}"
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
data/lib/homographs.rb
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
# local includes
|
4
|
+
require File.join(File.dirname(__FILE__), 'synset.rb')
|
5
|
+
|
6
|
+
module Words
|
7
|
+
|
8
|
+
class Homographs
|
9
|
+
|
10
|
+
POS_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb}
|
11
|
+
SYMBOL_TO_POS = POS_TO_SYMBOL.invert
|
12
|
+
|
13
|
+
def initialize(raw_homographs, wordnet_connection)
|
14
|
+
|
15
|
+
@wordnet_connection = wordnet_connection
|
16
|
+
@raw_homographs = raw_homographs
|
17
|
+
|
18
|
+
# construct some conveniance menthods for relation type access
|
19
|
+
SYMBOL_TO_POS.keys.each do |pos|
|
20
|
+
self.class.send(:define_method, "#{pos}s?") do
|
21
|
+
size(pos) > 0
|
22
|
+
end
|
23
|
+
self.class.send(:define_method, "#{pos}s") do
|
24
|
+
synsets(pos)
|
25
|
+
end
|
26
|
+
self.class.send(:define_method, "#{pos}_count") do
|
27
|
+
size(pos)
|
28
|
+
end
|
29
|
+
self.class.send(:define_method, "#{pos}_ids") do
|
30
|
+
synset_ids(pos)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
def tagsense_counts
|
37
|
+
|
38
|
+
@tagsense_counts ||= @raw_homographs["tagsense_counts"].split('|').map { |count| { POS_TO_SYMBOL[count[0,1]] => count[1..-1].to_i } }
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
def lemma
|
43
|
+
|
44
|
+
@lemma ||= @raw_homographs["lemma"].gsub('_', ' ')
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
def available_pos
|
49
|
+
|
50
|
+
@available_pos ||= synset_ids.map { |synset_id| POS_TO_SYMBOL[synset_id[0,1]] }.uniq
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
def to_s
|
55
|
+
|
56
|
+
@to_s ||= [lemma, " " + available_pos.join("/")].join(",")
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
def size(pos = :all)
|
61
|
+
|
62
|
+
synset_ids(pos).size
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
def synsets(pos = :all)
|
67
|
+
|
68
|
+
synset_ids(pos).map { |synset_id| Synset.new synset_id, @wordnet_connection, self }
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
def synset_ids(pos = :all)
|
73
|
+
|
74
|
+
@synset_ids ||= @raw_homographs["synset_ids"].split('|')
|
75
|
+
|
76
|
+
case
|
77
|
+
when SYMBOL_TO_POS.include?(pos.to_sym)
|
78
|
+
@synset_ids.select { |synset_id| synset_id[0,1] == SYMBOL_TO_POS[pos.to_sym] }
|
79
|
+
when POS_TO_SYMBOL.include?(pos.to_s)
|
80
|
+
@synset_ids.select { |synset_id| synset_id[0,1] == pos.to_s }
|
81
|
+
else
|
82
|
+
@synset_ids
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
|
87
|
+
def inspect
|
88
|
+
|
89
|
+
@raw_homographs.inspect
|
90
|
+
|
91
|
+
end
|
92
|
+
|
93
|
+
alias word lemma
|
94
|
+
alias pos available_pos
|
95
|
+
alias senses synsets
|
96
|
+
alias sense_ids synset_ids
|
97
|
+
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
data/lib/relation.rb
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
# local includes
|
4
|
+
require File.join(File.dirname(__FILE__), 'synset.rb')
|
5
|
+
|
6
|
+
module Words
|
7
|
+
|
8
|
+
class Relation
|
9
|
+
|
10
|
+
RELATION_TO_SYMBOL = { "-c" => :member_of_this_domain_topic, "+" => :derivationally_related_form, "%p" => :part_meronym, "~i" => :instance_hyponym, "@" => :hypernym,
|
11
|
+
";r" => :domain_of_synset_region, "!" => :antonym, "#p" => :part_holonym, "%s" => :substance_meronym, ";u" => :domain_of_synset_usage,
|
12
|
+
"-r" => :member_of_this_domain_region, "#s" => :substance_holonym, "=" => :attribute, "-u" => :member_of_this_domain_usage, ";c" => :domain_of_synset_topic,
|
13
|
+
"%m"=> :member_meronym, "~" => :hyponym, "@i" => :instance_hypernym, "#m" => :member_holonym, "$" => :verb_group, ">" => :cause, "*" => :entailment,
|
14
|
+
"\\" => :pertainym, "<" => :participle_of_verb, "&" => :similar_to, "^" => :see_also }
|
15
|
+
SYMBOL_TO_RELATION = RELATION_TO_SYMBOL.invert
|
16
|
+
|
17
|
+
def initialize(relation_construct, source_synset, wordnet_connection)
|
18
|
+
|
19
|
+
@wordnet_connection = wordnet_connection
|
20
|
+
@symbol, @dest_synset_id, @pos, @source_dest = relation_construct.split('.')
|
21
|
+
@dest_synset_id = @pos + @dest_synset_id
|
22
|
+
@symbol = RELATION_TO_SYMBOL[@symbol]
|
23
|
+
@source_synset = source_synset
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
def is_semantic?
|
28
|
+
|
29
|
+
@source_dest == "0000"
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
def source_word
|
34
|
+
|
35
|
+
return nil if is_semantic?
|
36
|
+
@source_word ||= @source_synset.words[@source_dest[0..1].to_i(16)-1]
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
def destination_word
|
41
|
+
|
42
|
+
return nil if is_semantic?
|
43
|
+
@destination_word ||= destination.words[@source_dest[2..3].to_i(16)-1]
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
def relation_type?(type)
|
48
|
+
|
49
|
+
case
|
50
|
+
when SYMBOL_TO_RELATION.include?(type.to_sym)
|
51
|
+
type.to_sym == @symbol
|
52
|
+
when RELATION_TO_SYMBOL.include?(pos.to_s)
|
53
|
+
POINTER_TO_SYMBOL[type.to_sym] == @symbol
|
54
|
+
else
|
55
|
+
false
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
def relation_type
|
61
|
+
|
62
|
+
@symbol
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
def destination
|
67
|
+
|
68
|
+
@destination ||= Synset.new(@dest_synset_id, @wordnet_connection, nil)
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
def to_s
|
73
|
+
|
74
|
+
if is_semantic?
|
75
|
+
@to_s ||= "Semantic #{relation_type.to_s.gsub('_', ' ')} relation between #{@source_synset.synset_id} and #{@dest_synset_id}"
|
76
|
+
else
|
77
|
+
@to_s ||= "#{relation_type.to_s.gsub('_', ' ').capitalize} relation between #{@source_synset.synset_id}'s word \"#{source_word}\" and #{@dest_synset_id}'s word \"#{destination_word}\""
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
def inspect
|
83
|
+
|
84
|
+
{ :symbol => @symbol, :dest_synset_id => @dest_synset_id, :pos => @pos, :source_dest => @source_dest }.inspect
|
85
|
+
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
data/lib/synset.rb
ADDED
@@ -0,0 +1,201 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
# local includes
|
4
|
+
require File.join(File.dirname(__FILE__), 'relation.rb')
|
5
|
+
require File.join(File.dirname(__FILE__), 'evocations.rb')
|
6
|
+
|
7
|
+
module Words
|
8
|
+
|
9
|
+
class Synset
|
10
|
+
|
11
|
+
SYNSET_TYPE_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb, "s" => :adjective_satallite }
|
12
|
+
SYNSET_TYPE_TO_NUMBER = { "n" => 1, "v" => 2, "a" => 3, "r" => 4, "s" => 5 }
|
13
|
+
NUM_TO_LEX = [ { :lex => :adj_all, :description => "all adjective clusters" },
|
14
|
+
{ :lex => :adj_pert, :description => "relational adjectives (pertainyms)" },
|
15
|
+
{ :lex => :adv_all, :description => "all adverbs" },
|
16
|
+
{ :lex => :noun_Tops, :description => "unique beginner for nouns" },
|
17
|
+
{ :lex => :noun_act, :description => "nouns denoting acts or actions" },
|
18
|
+
{ :lex => :noun_animal, :description => "nouns denoting animals" },
|
19
|
+
{ :lex => :noun_artifact, :description => "nouns denoting man-made objects" },
|
20
|
+
{ :lex => :noun_attribute, :description => "nouns denoting attributes of people and objects" },
|
21
|
+
{ :lex => :noun_body, :description => "nouns denoting body parts" },
|
22
|
+
{ :lex => :noun_cognition, :description => "nouns denoting cognitive processes and contents" },
|
23
|
+
{ :lex => :noun_communication, :description => "nouns denoting communicative processes and contents" },
|
24
|
+
{ :lex => :noun_event, :description => "nouns denoting natural events" },
|
25
|
+
{ :lex => :noun_feeling, :description => "nouns denoting feelings and emotions" },
|
26
|
+
{ :lex => :noun_food, :description => "nouns denoting foods and drinks" },
|
27
|
+
{ :lex => :noun_group, :description => "nouns denoting groupings of people or objects" },
|
28
|
+
{ :lex => :noun_location, :description => "nouns denoting spatial position" },
|
29
|
+
{ :lex => :noun_motive, :description => "nouns denoting goals" },
|
30
|
+
{ :lex => :noun_object, :description => "nouns denoting natural objects (not man-made)" },
|
31
|
+
{ :lex => :noun_person, :description => "nouns denoting people" },
|
32
|
+
{ :lex => :noun_phenomenon, :description => "nouns denoting natural phenomena" },
|
33
|
+
{ :lex => :noun_plant, :description => "nouns denoting plants" },
|
34
|
+
{ :lex => :noun_possession, :description => "nouns denoting possession and transfer of possession" },
|
35
|
+
{ :lex => :noun_process, :description => "nouns denoting natural processes" },
|
36
|
+
{ :lex => :noun_quantity, :description => "nouns denoting quantities and units of measure" },
|
37
|
+
{ :lex => :noun_relation, :description => "nouns denoting relations between people or things or ideas" },
|
38
|
+
{ :lex => :noun_shape, :description => "nouns denoting two and three dimensional shapes" },
|
39
|
+
{ :lex => :noun_state, :description => "nouns denoting stable states of affairs" },
|
40
|
+
{ :lex => :noun_substance, :description => "nouns denoting substances" },
|
41
|
+
{ :lex => :noun_time, :description => "nouns denoting time and temporal relations" },
|
42
|
+
{ :lex => :verb_body, :description => "verbs of grooming, dressing and bodily care" },
|
43
|
+
{ :lex => :verb_change, :description => "verbs of size, temperature change, intensifying, etc." },
|
44
|
+
{ :lex => :verb_cognition, :description => "verbs of thinking, judging, analyzing, doubting" },
|
45
|
+
{ :lex => :verb_communication, :description => "verbs of telling, asking, ordering, singing" },
|
46
|
+
{ :lex => :verb_competition, :description => "verbs of fighting, athletic activities" },
|
47
|
+
{ :lex => :verb_consumption, :description => "verbs of eating and drinking" },
|
48
|
+
{ :lex => :verb_contact, :description => "verbs of touching, hitting, tying, digging" },
|
49
|
+
{ :lex => :verb_creation, :description => "verbs of sewing, baking, painting, performing" },
|
50
|
+
{ :lex => :verb_emotion, :description => "verbs of feeling" },
|
51
|
+
{ :lex => :verb_motion, :description => "verbs of walking, flying, swimming" },
|
52
|
+
{ :lex => :verb_perception, :description => "verbs of seeing, hearing, feeling" },
|
53
|
+
{ :lex => :verb_possession, :description => "verbs of buying, selling, owning" },
|
54
|
+
{ :lex => :verb_social, :description => "verbs of political and social activities and events" },
|
55
|
+
{ :lex => :verb_stative, :description => "verbs of being, having, spatial relations" },
|
56
|
+
{ :lex => :verb_weather, :description => "verbs of raining, snowing, thawing, thundering" },
|
57
|
+
{ :lex => :adj_ppl, :description => "participial adjectives" } ]
|
58
|
+
|
59
|
+
def initialize(synset_id, wordnet_connection, homographs)
|
60
|
+
|
61
|
+
@wordnet_connection = wordnet_connection
|
62
|
+
@synset_hash = wordnet_connection.synset(synset_id)
|
63
|
+
@homographs = homographs
|
64
|
+
|
65
|
+
# construct some conveniance menthods for relation type access
|
66
|
+
Relation::SYMBOL_TO_RELATION.keys.each do |relation_type|
|
67
|
+
self.class.send(:define_method, "#{relation_type}s?") do
|
68
|
+
relations(relation_type).size > 0
|
69
|
+
end
|
70
|
+
self.class.send(:define_method, "#{relation_type}s") do
|
71
|
+
relations(relation_type)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
def synset_type
|
78
|
+
|
79
|
+
SYNSET_TYPE_TO_SYMBOL[@synset_hash["synset_type"]]
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
def words
|
84
|
+
|
85
|
+
@words ||= map_from_words_with_lexical_ids(:word)
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
def lexical_ids
|
90
|
+
|
91
|
+
@words ||= map_from_words_with_lexical_ids(:lexical_id)
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
def size
|
96
|
+
|
97
|
+
words.size
|
98
|
+
|
99
|
+
end
|
100
|
+
|
101
|
+
def words_with_lexical_ids
|
102
|
+
|
103
|
+
@words_with_num ||= @synset_hash["words"].split('|').map { |word| word_parts = word.split('.'); { :word => word_parts[0].gsub('_', ' '), :lexical_id => word_parts[1] } }
|
104
|
+
|
105
|
+
end
|
106
|
+
|
107
|
+
def lexical_filenum
|
108
|
+
|
109
|
+
@synset_hash["lexical_filenum"]
|
110
|
+
|
111
|
+
end
|
112
|
+
|
113
|
+
def lexical_catagory
|
114
|
+
|
115
|
+
lexical[:lex]
|
116
|
+
|
117
|
+
end
|
118
|
+
|
119
|
+
def lexical_description
|
120
|
+
|
121
|
+
lexical[:description]
|
122
|
+
|
123
|
+
end
|
124
|
+
|
125
|
+
def lexical
|
126
|
+
|
127
|
+
NUM_TO_LEX[lexical_filenum.to_i]
|
128
|
+
|
129
|
+
end
|
130
|
+
|
131
|
+
def synset_id
|
132
|
+
|
133
|
+
@synset_hash["synset_id"]
|
134
|
+
|
135
|
+
end
|
136
|
+
|
137
|
+
def gloss
|
138
|
+
|
139
|
+
@synset_hash["gloss"]
|
140
|
+
|
141
|
+
end
|
142
|
+
|
143
|
+
def lemma
|
144
|
+
|
145
|
+
@homographs.lemma
|
146
|
+
|
147
|
+
end
|
148
|
+
|
149
|
+
def homographs
|
150
|
+
|
151
|
+
@homographs
|
152
|
+
|
153
|
+
end
|
154
|
+
|
155
|
+
def inspect
|
156
|
+
|
157
|
+
@synset_hash.inspect
|
158
|
+
|
159
|
+
end
|
160
|
+
|
161
|
+
def relations(type = :all)
|
162
|
+
|
163
|
+
@relations ||= @synset_hash["relations"].split('|').map { |relation| Relation.new(relation, self, @wordnet_connection) }
|
164
|
+
|
165
|
+
case
|
166
|
+
when Relation::SYMBOL_TO_RELATION.include?(type.to_sym)
|
167
|
+
@relations.select { |relation| relation.relation_type == type.to_sym }
|
168
|
+
when Relation::RELATION_TO_SYMBOL.include?(type.to_s)
|
169
|
+
@relations.select { |relation| relation.relation_type == Relation::RELATION_TO_SYMBOL[type.to_s] }
|
170
|
+
else
|
171
|
+
@relations
|
172
|
+
end
|
173
|
+
|
174
|
+
end
|
175
|
+
|
176
|
+
def evocations
|
177
|
+
|
178
|
+
evocations_arr = @wordnet_connection.evocations(synset_id)
|
179
|
+
Evocations.new(evocations_arr, self, @wordnet_connection) unless evocations_arr.nil?
|
180
|
+
|
181
|
+
end
|
182
|
+
|
183
|
+
def to_s
|
184
|
+
|
185
|
+
@to_s ||= "#{synset_type.to_s.capitalize} including word(s): #{words.map { |word| '"' + word + '"' }.join(', ')} meaning: #{gloss}"
|
186
|
+
|
187
|
+
end
|
188
|
+
|
189
|
+
alias word lemma
|
190
|
+
|
191
|
+
private
|
192
|
+
|
193
|
+
def map_from_words_with_lexical_ids(value)
|
194
|
+
|
195
|
+
words_with_lexical_ids.map { |word_with_num| word_with_num[value] }
|
196
|
+
|
197
|
+
end
|
198
|
+
|
199
|
+
end
|
200
|
+
|
201
|
+
end
|