words-wordnet 0.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,81 @@
1
+ # coding: utf-8
2
+
3
+ # local includes
4
+ require File.join(File.dirname(__FILE__), 'synset.rb')
5
+
6
+ module Words
7
+
8
+ class Evocations
9
+
10
+ def initialize(evocation_construct, source_synset, wordnet_connection)
11
+
12
+ @evocation_construct, @source, @wordnet_connection = evocation_construct, source_synset, wordnet_connection
13
+
14
+ end
15
+
16
+ def means
17
+
18
+ @means ||= @evocation_construct["means"].split('|')
19
+
20
+ end
21
+
22
+ def medians
23
+
24
+ @medians ||= @evocation_construct["medians"].split('|')
25
+
26
+ end
27
+
28
+ def size
29
+
30
+ means.size
31
+
32
+ end
33
+
34
+ def first
35
+
36
+ self[0]
37
+
38
+ end
39
+
40
+ def last
41
+
42
+ self[size-1]
43
+
44
+ end
45
+
46
+ def [] (index)
47
+
48
+ { :destination => Synset.new(destination_ids[index], @wordnet_connection, @source.homographs), :mean => means[index], :median => medians[index] }
49
+
50
+ end
51
+
52
+ def destinations(pos = :all)
53
+
54
+ destination_ids(pos).map { |synset_id| Synset.new synset_id, @wordnet_connection, @source.homographs }
55
+
56
+ end
57
+
58
+ def destination_ids(pos = :all)
59
+
60
+ @destination_ids ||= @evocation_construct["relations"].split('|')
61
+
62
+ case
63
+ when Homographs::SYMBOL_TO_POS.include?(pos.to_sym)
64
+ @destination_ids.select { |synset_id| synset_id[0,1] == Homographs::SYMBOL_TO_POS[pos.to_sym] }
65
+ when Homographs::POS_TO_SYMBOL.include?(pos.to_s)
66
+ @destination_ids.select { |synset_id| synset_id[0,1] == pos.to_s }
67
+ else
68
+ @destination_ids
69
+ end
70
+
71
+ end
72
+
73
+ def to_s
74
+
75
+ "#{size} evocations from the #{@source}"
76
+
77
+ end
78
+
79
+ end
80
+
81
+ end
@@ -0,0 +1,100 @@
1
+ # coding: utf-8
2
+
3
+ # local includes
4
+ require File.join(File.dirname(__FILE__), 'synset.rb')
5
+
6
+ module Words
7
+
8
+ class Homographs
9
+
10
+ POS_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb}
11
+ SYMBOL_TO_POS = POS_TO_SYMBOL.invert
12
+
13
+ def initialize(raw_homographs, wordnet_connection)
14
+
15
+ @wordnet_connection = wordnet_connection
16
+ @raw_homographs = raw_homographs
17
+
18
+ # construct some conveniance menthods for relation type access
19
+ SYMBOL_TO_POS.keys.each do |pos|
20
+ self.class.send(:define_method, "#{pos}s?") do
21
+ size(pos) > 0
22
+ end
23
+ self.class.send(:define_method, "#{pos}s") do
24
+ synsets(pos)
25
+ end
26
+ self.class.send(:define_method, "#{pos}_count") do
27
+ size(pos)
28
+ end
29
+ self.class.send(:define_method, "#{pos}_ids") do
30
+ synset_ids(pos)
31
+ end
32
+ end
33
+
34
+ end
35
+
36
+ def tagsense_counts
37
+
38
+ @tagsense_counts ||= @raw_homographs["tagsense_counts"].split('|').map { |count| { POS_TO_SYMBOL[count[0,1]] => count[1..-1].to_i } }
39
+
40
+ end
41
+
42
+ def lemma
43
+
44
+ @lemma ||= @raw_homographs["lemma"].gsub('_', ' ')
45
+
46
+ end
47
+
48
+ def available_pos
49
+
50
+ @available_pos ||= synset_ids.map { |synset_id| POS_TO_SYMBOL[synset_id[0,1]] }.uniq
51
+
52
+ end
53
+
54
+ def to_s
55
+
56
+ @to_s ||= [lemma, " " + available_pos.join("/")].join(",")
57
+
58
+ end
59
+
60
+ def size(pos = :all)
61
+
62
+ synset_ids(pos).size
63
+
64
+ end
65
+
66
+ def synsets(pos = :all)
67
+
68
+ synset_ids(pos).map { |synset_id| Synset.new synset_id, @wordnet_connection, self }
69
+
70
+ end
71
+
72
+ def synset_ids(pos = :all)
73
+
74
+ @synset_ids ||= @raw_homographs["synset_ids"].split('|')
75
+
76
+ case
77
+ when SYMBOL_TO_POS.include?(pos.to_sym)
78
+ @synset_ids.select { |synset_id| synset_id[0,1] == SYMBOL_TO_POS[pos.to_sym] }
79
+ when POS_TO_SYMBOL.include?(pos.to_s)
80
+ @synset_ids.select { |synset_id| synset_id[0,1] == pos.to_s }
81
+ else
82
+ @synset_ids
83
+ end
84
+
85
+ end
86
+
87
+ def inspect
88
+
89
+ @raw_homographs.inspect
90
+
91
+ end
92
+
93
+ alias word lemma
94
+ alias pos available_pos
95
+ alias senses synsets
96
+ alias sense_ids synset_ids
97
+
98
+ end
99
+
100
+ end
@@ -0,0 +1,90 @@
1
+ # coding: utf-8
2
+
3
+ # local includes
4
+ require File.join(File.dirname(__FILE__), 'synset.rb')
5
+
6
+ module Words
7
+
8
+ class Relation
9
+
10
+ RELATION_TO_SYMBOL = { "-c" => :member_of_this_domain_topic, "+" => :derivationally_related_form, "%p" => :part_meronym, "~i" => :instance_hyponym, "@" => :hypernym,
11
+ ";r" => :domain_of_synset_region, "!" => :antonym, "#p" => :part_holonym, "%s" => :substance_meronym, ";u" => :domain_of_synset_usage,
12
+ "-r" => :member_of_this_domain_region, "#s" => :substance_holonym, "=" => :attribute, "-u" => :member_of_this_domain_usage, ";c" => :domain_of_synset_topic,
13
+ "%m"=> :member_meronym, "~" => :hyponym, "@i" => :instance_hypernym, "#m" => :member_holonym, "$" => :verb_group, ">" => :cause, "*" => :entailment,
14
+ "\\" => :pertainym, "<" => :participle_of_verb, "&" => :similar_to, "^" => :see_also }
15
+ SYMBOL_TO_RELATION = RELATION_TO_SYMBOL.invert
16
+
17
+ def initialize(relation_construct, source_synset, wordnet_connection)
18
+
19
+ @wordnet_connection = wordnet_connection
20
+ @symbol, @dest_synset_id, @pos, @source_dest = relation_construct.split('.')
21
+ @dest_synset_id = @pos + @dest_synset_id
22
+ @symbol = RELATION_TO_SYMBOL[@symbol]
23
+ @source_synset = source_synset
24
+
25
+ end
26
+
27
+ def is_semantic?
28
+
29
+ @source_dest == "0000"
30
+
31
+ end
32
+
33
+ def source_word
34
+
35
+ return nil if is_semantic?
36
+ @source_word ||= @source_synset.words[@source_dest[0..1].to_i(16)-1]
37
+
38
+ end
39
+
40
+ def destination_word
41
+
42
+ return nil if is_semantic?
43
+ @destination_word ||= destination.words[@source_dest[2..3].to_i(16)-1]
44
+
45
+ end
46
+
47
+ def relation_type?(type)
48
+
49
+ case
50
+ when SYMBOL_TO_RELATION.include?(type.to_sym)
51
+ type.to_sym == @symbol
52
+ when RELATION_TO_SYMBOL.include?(pos.to_s)
53
+ POINTER_TO_SYMBOL[type.to_sym] == @symbol
54
+ else
55
+ false
56
+ end
57
+
58
+ end
59
+
60
+ def relation_type
61
+
62
+ @symbol
63
+
64
+ end
65
+
66
+ def destination
67
+
68
+ @destination ||= Synset.new(@dest_synset_id, @wordnet_connection, nil)
69
+
70
+ end
71
+
72
+ def to_s
73
+
74
+ if is_semantic?
75
+ @to_s ||= "Semantic #{relation_type.to_s.gsub('_', ' ')} relation between #{@source_synset.synset_id} and #{@dest_synset_id}"
76
+ else
77
+ @to_s ||= "#{relation_type.to_s.gsub('_', ' ').capitalize} relation between #{@source_synset.synset_id}'s word \"#{source_word}\" and #{@dest_synset_id}'s word \"#{destination_word}\""
78
+ end
79
+
80
+ end
81
+
82
+ def inspect
83
+
84
+ { :symbol => @symbol, :dest_synset_id => @dest_synset_id, :pos => @pos, :source_dest => @source_dest }.inspect
85
+
86
+ end
87
+
88
+ end
89
+
90
+ end
@@ -0,0 +1,201 @@
1
+ # coding: utf-8
2
+
3
+ # local includes
4
+ require File.join(File.dirname(__FILE__), 'relation.rb')
5
+ require File.join(File.dirname(__FILE__), 'evocations.rb')
6
+
7
+ module Words
8
+
9
+ class Synset
10
+
11
+ SYNSET_TYPE_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb, "s" => :adjective_satallite }
12
+ SYNSET_TYPE_TO_NUMBER = { "n" => 1, "v" => 2, "a" => 3, "r" => 4, "s" => 5 }
13
+ NUM_TO_LEX = [ { :lex => :adj_all, :description => "all adjective clusters" },
14
+ { :lex => :adj_pert, :description => "relational adjectives (pertainyms)" },
15
+ { :lex => :adv_all, :description => "all adverbs" },
16
+ { :lex => :noun_Tops, :description => "unique beginner for nouns" },
17
+ { :lex => :noun_act, :description => "nouns denoting acts or actions" },
18
+ { :lex => :noun_animal, :description => "nouns denoting animals" },
19
+ { :lex => :noun_artifact, :description => "nouns denoting man-made objects" },
20
+ { :lex => :noun_attribute, :description => "nouns denoting attributes of people and objects" },
21
+ { :lex => :noun_body, :description => "nouns denoting body parts" },
22
+ { :lex => :noun_cognition, :description => "nouns denoting cognitive processes and contents" },
23
+ { :lex => :noun_communication, :description => "nouns denoting communicative processes and contents" },
24
+ { :lex => :noun_event, :description => "nouns denoting natural events" },
25
+ { :lex => :noun_feeling, :description => "nouns denoting feelings and emotions" },
26
+ { :lex => :noun_food, :description => "nouns denoting foods and drinks" },
27
+ { :lex => :noun_group, :description => "nouns denoting groupings of people or objects" },
28
+ { :lex => :noun_location, :description => "nouns denoting spatial position" },
29
+ { :lex => :noun_motive, :description => "nouns denoting goals" },
30
+ { :lex => :noun_object, :description => "nouns denoting natural objects (not man-made)" },
31
+ { :lex => :noun_person, :description => "nouns denoting people" },
32
+ { :lex => :noun_phenomenon, :description => "nouns denoting natural phenomena" },
33
+ { :lex => :noun_plant, :description => "nouns denoting plants" },
34
+ { :lex => :noun_possession, :description => "nouns denoting possession and transfer of possession" },
35
+ { :lex => :noun_process, :description => "nouns denoting natural processes" },
36
+ { :lex => :noun_quantity, :description => "nouns denoting quantities and units of measure" },
37
+ { :lex => :noun_relation, :description => "nouns denoting relations between people or things or ideas" },
38
+ { :lex => :noun_shape, :description => "nouns denoting two and three dimensional shapes" },
39
+ { :lex => :noun_state, :description => "nouns denoting stable states of affairs" },
40
+ { :lex => :noun_substance, :description => "nouns denoting substances" },
41
+ { :lex => :noun_time, :description => "nouns denoting time and temporal relations" },
42
+ { :lex => :verb_body, :description => "verbs of grooming, dressing and bodily care" },
43
+ { :lex => :verb_change, :description => "verbs of size, temperature change, intensifying, etc." },
44
+ { :lex => :verb_cognition, :description => "verbs of thinking, judging, analyzing, doubting" },
45
+ { :lex => :verb_communication, :description => "verbs of telling, asking, ordering, singing" },
46
+ { :lex => :verb_competition, :description => "verbs of fighting, athletic activities" },
47
+ { :lex => :verb_consumption, :description => "verbs of eating and drinking" },
48
+ { :lex => :verb_contact, :description => "verbs of touching, hitting, tying, digging" },
49
+ { :lex => :verb_creation, :description => "verbs of sewing, baking, painting, performing" },
50
+ { :lex => :verb_emotion, :description => "verbs of feeling" },
51
+ { :lex => :verb_motion, :description => "verbs of walking, flying, swimming" },
52
+ { :lex => :verb_perception, :description => "verbs of seeing, hearing, feeling" },
53
+ { :lex => :verb_possession, :description => "verbs of buying, selling, owning" },
54
+ { :lex => :verb_social, :description => "verbs of political and social activities and events" },
55
+ { :lex => :verb_stative, :description => "verbs of being, having, spatial relations" },
56
+ { :lex => :verb_weather, :description => "verbs of raining, snowing, thawing, thundering" },
57
+ { :lex => :adj_ppl, :description => "participial adjectives" } ]
58
+
59
+ def initialize(synset_id, wordnet_connection, homographs)
60
+
61
+ @wordnet_connection = wordnet_connection
62
+ @synset_hash = wordnet_connection.synset(synset_id)
63
+ @homographs = homographs
64
+
65
+ # construct some conveniance menthods for relation type access
66
+ Relation::SYMBOL_TO_RELATION.keys.each do |relation_type|
67
+ self.class.send(:define_method, "#{relation_type}s?") do
68
+ relations(relation_type).size > 0
69
+ end
70
+ self.class.send(:define_method, "#{relation_type}s") do
71
+ relations(relation_type)
72
+ end
73
+ end
74
+
75
+ end
76
+
77
+ def synset_type
78
+
79
+ SYNSET_TYPE_TO_SYMBOL[@synset_hash["synset_type"]]
80
+
81
+ end
82
+
83
+ def words
84
+
85
+ @words ||= map_from_words_with_lexical_ids(:word)
86
+
87
+ end
88
+
89
+ def lexical_ids
90
+
91
+ @words ||= map_from_words_with_lexical_ids(:lexical_id)
92
+
93
+ end
94
+
95
+ def size
96
+
97
+ words.size
98
+
99
+ end
100
+
101
+ def words_with_lexical_ids
102
+
103
+ @words_with_num ||= @synset_hash["words"].split('|').map { |word| word_parts = word.split('.'); { :word => word_parts[0].gsub('_', ' '), :lexical_id => word_parts[1] } }
104
+
105
+ end
106
+
107
+ def lexical_filenum
108
+
109
+ @synset_hash["lexical_filenum"]
110
+
111
+ end
112
+
113
+ def lexical_catagory
114
+
115
+ lexical[:lex]
116
+
117
+ end
118
+
119
+ def lexical_description
120
+
121
+ lexical[:description]
122
+
123
+ end
124
+
125
+ def lexical
126
+
127
+ NUM_TO_LEX[lexical_filenum.to_i]
128
+
129
+ end
130
+
131
+ def synset_id
132
+
133
+ @synset_hash["synset_id"]
134
+
135
+ end
136
+
137
+ def gloss
138
+
139
+ @synset_hash["gloss"]
140
+
141
+ end
142
+
143
+ def lemma
144
+
145
+ @homographs.lemma
146
+
147
+ end
148
+
149
+ def homographs
150
+
151
+ @homographs
152
+
153
+ end
154
+
155
+ def inspect
156
+
157
+ @synset_hash.inspect
158
+
159
+ end
160
+
161
+ def relations(type = :all)
162
+
163
+ @relations ||= @synset_hash["relations"].split('|').map { |relation| Relation.new(relation, self, @wordnet_connection) }
164
+
165
+ case
166
+ when Relation::SYMBOL_TO_RELATION.include?(type.to_sym)
167
+ @relations.select { |relation| relation.relation_type == type.to_sym }
168
+ when Relation::RELATION_TO_SYMBOL.include?(type.to_s)
169
+ @relations.select { |relation| relation.relation_type == Relation::RELATION_TO_SYMBOL[type.to_s] }
170
+ else
171
+ @relations
172
+ end
173
+
174
+ end
175
+
176
+ def evocations
177
+
178
+ evocations_arr = @wordnet_connection.evocations(synset_id)
179
+ Evocations.new(evocations_arr, self, @wordnet_connection) unless evocations_arr.nil?
180
+
181
+ end
182
+
183
+ def to_s
184
+
185
+ @to_s ||= "#{synset_type.to_s.capitalize} including word(s): #{words.map { |word| '"' + word + '"' }.join(', ')} meaning: #{gloss}"
186
+
187
+ end
188
+
189
+ alias word lemma
190
+
191
+ private
192
+
193
+ def map_from_words_with_lexical_ids(value)
194
+
195
+ words_with_lexical_ids.map { |word_with_num| word_with_num[value] }
196
+
197
+ end
198
+
199
+ end
200
+
201
+ end