rwordnet 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4bea9b677b6a581d27c04ad1912c3034bf8329d8
4
- data.tar.gz: 4fdef6fdfdbe2373b445f7857d3fe5ff5071fba2
3
+ metadata.gz: af20be262ff83829299dfcaab7bdaf6daca77d9e
4
+ data.tar.gz: 5fdf6de52538acc9e2e6c6cd59af7f8055aa1361
5
5
  SHA512:
6
- metadata.gz: c1471523dcb27e496eb72b406f37ddd83184dfcecbcabe95c492f6018b29fa53180d06035b7f9c0a938bfde0e4c28a37f89473c0b3a712be527ed2abf6261af3
7
- data.tar.gz: 25cf40f306f3dfcf8076d9430cfd5d8d805af9198762a530ed4f293e5b0c4e5ccc680e941b2256f63c5141d5499ad77b7821eec5b4e245a72a47e1f9ca6e83b3
6
+ metadata.gz: 9f232d93029c8f200e6ba54af4461df6a4430e7ecc1189510cee59447303a350bb47f6deab13ad9b3cb7d6730b52d514b73377f673f1d81c03b4997d2a6ababe
7
+ data.tar.gz: d374907bacd015be0274bb8f8219d1803bc884b9c852f99c83441ca303e7fe2ac4687be7341462e6bb5fb54657f09c91d4cf64edc746ee27c8577ce2286a2942
data/README.markdown CHANGED
@@ -1,6 +1,9 @@
1
1
  # A pure Ruby interface to WordNet #
2
2
 
3
3
  [![Build Status](https://travis-ci.org/doches/rwordnet.png)](https://travis-ci.org/doches/rwordnet)
4
+ [![Documentation Status](https://inch-ci.org/github/doches/rwordnet.svg?branch=master)](https://inch-ci.org/github/doches/rwordnet)
5
+ [![Code Climate](https://codeclimate.com/github/doches/rwordnet/badges/gpa.svg)](https://codeclimate.com/github/doches/rwordnet)
6
+ [![Test Coverage](https://codeclimate.com/github/doches/rwordnet/badges/coverage.svg)](https://codeclimate.com/github/doches/rwordnet/coverage)
4
7
 
5
8
  ## Summary ##
6
9
 
@@ -1,9 +1,9 @@
1
1
  require 'wordnet'
2
2
 
3
- # Find the word 'fruit'
4
- lemma = WordNet::Lemma.find("fruit", :noun)
5
- # Find all the synsets for 'fruit', and pick the first one.
3
+ # Find the word 'dog'
4
+ lemma = WordNet::Lemma.find("dog", :noun)
5
+ # Find all the synsets for 'dog', and pick the first one.
6
6
  synset = lemma.synsets[0]
7
7
  puts synset
8
- # Print the full hypernym derivation for the first sense of 'fruit'.
9
- synset.expanded_hypernym.each { |d| puts d }
8
+ # Print the full hypernym derivation for the first sense of 'dog'.
9
+ synset.expanded_hypernyms.each { |d| puts d }
@@ -0,0 +1,20 @@
1
+ require 'wordnet'
2
+
3
+ puts 'dogs'
4
+ puts '--------------'
5
+ puts 'as noun'
6
+ p WordNet::Synset.morphy('dogs', 'noun')
7
+ puts 'as verb'
8
+ p WordNet::Synset.morphy('dogs', 'verb')
9
+
10
+
11
+ puts ''
12
+ puts 'hiking'
13
+ puts '--------------'
14
+ puts 'as noun'
15
+ p WordNet::Synset.morphy('hiking', 'noun')
16
+ puts 'as verb'
17
+ p WordNet::Synset.morphy('hiking', 'verb')
18
+ puts 'as all'
19
+ p WordNet::Synset.morphy_all('hiking')
20
+
@@ -0,0 +1,8 @@
1
+ require 'wordnet'
2
+
3
+ puts 'hiking'
4
+ WordNet::Synset.find_all('hiking').each{|d| puts d}
5
+
6
+ puts''
7
+ puts 'dogs'
8
+ WordNet::Synset.find_all('dogs').each{|d| puts d}
data/lib/wordnet/db.rb CHANGED
@@ -1,14 +1,25 @@
1
+ require 'stringio'
2
+
1
3
  module WordNet
2
4
  # Represents the WordNet database, and provides some basic interaction.
3
5
  class DB
4
6
  # By default, use the bundled WordNet
5
7
  @path = File.expand_path("../../../WordNet-3.0/", __FILE__)
6
8
 
9
+ class << self; attr_accessor :cached end
10
+ @raw_wordnet = {}
11
+
12
+
7
13
  class << self
8
14
  # To use your own WordNet installation (rather than the one bundled with rwordnet:
9
15
  # Returns the path to the WordNet installation currently in use. Defaults to the bundled version of WordNet.
10
16
  attr_accessor :path
11
17
 
18
+ # Open a wordnet database. You shouldn't have to call this directly; it's
19
+ # handled by the autocaching implemented in lemma.rb.
20
+ #
21
+ # `path` should be a string containing the absolute path to the root of a
22
+ # WordNet installation.
12
23
  def open(path, &block)
13
24
  File.open(File.join(self.path, path), "r", &block)
14
25
  end
data/lib/wordnet/lemma.rb CHANGED
@@ -2,9 +2,27 @@ module WordNet
2
2
  # Represents a single word in the WordNet lexicon, which can be used to look up a set of synsets.
3
3
  class Lemma
4
4
  SPACE = ' '
5
- attr_accessor :word, :pos, :pointer_symbols, :tagsense_count, :synset_offsets, :id
6
5
 
7
- # Create a lemma from a line in an lexicon file. You should be creating Lemmas by hand; instead,
6
+ # The word this lemma represents
7
+ attr_accessor :word
8
+
9
+ # The part of speech (noun, verb, adjective) of this lemma. One of 'n', 'v', 'a' (adjective), or 'r' (adverb)
10
+ attr_accessor :pos
11
+
12
+ # The number of times the sense is tagged in various semantic concordance texts. A tagsense_count of 0 indicates that the sense has not been semantically tagged.
13
+ attr_accessor :tagsense_count
14
+
15
+ # The offset, in bytes, at which the synsets contained in this lemma are stored in WordNet's internal database.
16
+ attr_accessor :synset_offsets
17
+
18
+ # A unique integer id that references this lemma. Used internally within WordNet's database.
19
+ attr_accessor :id
20
+
21
+ # An array of valid pointer symbols for this lemma. The list of all valid
22
+ # pointer symbols is defined in pointers.rb.
23
+ attr_accessor :pointer_symbols
24
+
25
+ # Create a lemma from a line in an lexicon file. You should not be creating Lemmas by hand; instead,
8
26
  # use the WordNet::Lemma.find and WordNet::Lemma.find_all methods to find the Lemma for a word.
9
27
  def initialize(lexicon_line, id)
10
28
  @id = id
@@ -24,6 +42,8 @@ module WordNet
24
42
  @synset_offsets.map { |offset| Synset.new(@pos, offset) }
25
43
  end
26
44
 
45
+ # Returns a compact string representation of this lemma, e.g. "fall, v" for
46
+ # the verb form of the word "fall".
27
47
  def to_s
28
48
  [@word, @pos].join(",")
29
49
  end
@@ -31,6 +51,7 @@ module WordNet
31
51
  class << self
32
52
  @@cache = {}
33
53
 
54
+ # Find all lemmas for this word across all known parts of speech
34
55
  def find_all(word)
35
56
  [:noun, :verb, :adj, :adv].flat_map do |pos|
36
57
  find(word, pos) || []
@@ -1,7 +1,25 @@
1
1
  module WordNet
2
+ # Pointers represent the relations between the words in one synset and another.
2
3
  class Pointer
3
- attr_reader :symbol, :offset, :pos, :source, :target
4
+ # The symbol that devices the relationship this pointer represents, e.g. "!" for verb antonym. Valid
5
+ # pointer symbols are defined in pointers.rb
6
+ attr_reader :symbol
4
7
 
8
+ # The offset, in bytes, of this pointer in WordNet's internal database.
9
+ attr_reader :offset
10
+
11
+ # The part of speech this pointer represents. One of 'n', 'v', 'a' (adjective), or 'r' (adverb).
12
+ attr_reader :pos
13
+
14
+ # The synset from which this pointer...points.
15
+ attr_reader :source
16
+
17
+ # The synset to which this pointer...points.
18
+ attr_reader :target
19
+
20
+ # Create a pointer. Pointers represent the relations between the words in one synset and another,
21
+ # and are referenced by a shorthand symbol (e.g. '!' for verb antonym). The list
22
+ # of valid pointer symbols is defined in pointers.rb
5
23
  def initialize(symbol: raise, offset: raise, pos: raise, source: raise)
6
24
  @symbol, @offset, @pos, @source = symbol, offset, pos, source
7
25
  @target = source.slice!(2,2)
@@ -4,7 +4,47 @@ module WordNet
4
4
  # Represents a synset (or group of synonymous words) in WordNet. Synsets are related to each other by various (and numerous!)
5
5
  # relationships, including Hypernym (x is a hypernym of y <=> x is a parent of y) and Hyponym (x is a child of y)
6
6
  class Synset
7
- attr_reader :gloss, :synset_offset, :lex_filenum, :synset_type, :word_counts, :pos_offset, :pos
7
+ @morphy_path = File.expand_path("../../../morphy/", __FILE__)
8
+ @exception_map = {}
9
+ @morphological_substitutions = {
10
+ 'noun' => [['s', ''], ['ses', 's'], ['ves', 'f'], ['xes', 'x'],
11
+ ['zes', 'z'], ['ches', 'ch'], ['shes', 'sh'],
12
+ ['men', 'man'], ['ies', 'y']],
13
+ 'verb' => [['s', ''], ['ies', 'y'], ['es', 'e'], ['es', ''],
14
+ ['ed', 'e'], ['ed', ''], ['ing', 'e'], ['ing', '']],
15
+ 'adj' => [['er', ''], ['est', ''], ['er', 'e'], ['est', 'e']],
16
+ 'adv' => []}
17
+
18
+ # Get the offset, in bytes, at which this synset's information is stored in WordNet's internal DB.
19
+ # You almost certainly don't care about this.
20
+ attr_reader :synset_offset
21
+
22
+ # A two digit decimal integer representing the name of the lexicographer file containing the synset for the sense.
23
+ # Probably only of interest if you're using a wordnet database marked up with custom attributes, and you
24
+ # want to ensure that you're using your own additions.
25
+ attr_reader :lex_filenum
26
+
27
+ # Get the list of words (and their frequencies within the WordNet graph) contained
28
+ # in this Synset.
29
+ attr_reader :word_counts
30
+
31
+ # Get the part of speech type of this synset. One of 'n' (noun), 'v' (verb), 'a' (adjective), or 'r' (adverb)
32
+ attr_reader :synset_type
33
+
34
+ # Get the offset, in bytes, at which this synset's POS information is stored in WordNet's internal DB.
35
+ # You almost certainly don't care about this.
36
+ attr_reader :pos_offset
37
+
38
+ # Get a shorthand representation of the part of speech this synset represents, e.g. "v" for verbs.
39
+ attr_reader :pos
40
+
41
+ # Get a string representation of this synset's gloss. "Gloss" is a human-readable
42
+ # description of this concept, often with example usage, e.g:
43
+ #
44
+ # move upward; "The fog lifted"; "The smoke arose from the forest fire"; "The mist uprose from the meadows"
45
+ #
46
+ # for the second sense of the verb "fall"
47
+ attr_reader :gloss
8
48
 
9
49
  # Create a new synset by reading from the data file specified by +pos+, at +offset+ bytes into the file. This is how
10
50
  # the WordNet database is organized. You shouldn't be creating Synsets directly; instead, use Lemma#synsets.
@@ -40,6 +80,93 @@ module WordNet
40
80
  end
41
81
  end
42
82
 
83
+ # Ported from python NLTK
84
+ # Load all synsets with a given lemma and part of speech tag.
85
+ # If no pos is specified, all synsets for all parts of speech
86
+ # will be loaded.
87
+ # If lang is specified, all the synsets associated with the lemma name
88
+ # of that language will be returned.
89
+ def self.find(word, pos)
90
+ word = word.downcase
91
+ lemmas = self.morphy(word, pos).map{|form| WordNet::Lemma.find(form, pos)}
92
+ lemmas.map{|lemma| lemma.synsets}.flatten
93
+ end
94
+
95
+ def self.find_all(word)
96
+ SYNSET_TYPES.values.map{|pos| self.find(word, pos)}.flatten
97
+ end
98
+
99
+ def self.load_exception_map
100
+ SYNSET_TYPES.each do |_, pos|
101
+ @exception_map[pos] = {}
102
+ File.open(File.join(@morphy_path, 'exceptions', "#{pos}.exc"), 'r').each_line do |line|
103
+ line = line.split
104
+ @exception_map[pos][line[0]] = line[1..-1]
105
+ end
106
+ end
107
+ end
108
+
109
+ def self._apply_rules(forms, pos)
110
+ substitutions = @morphological_substitutions[pos]
111
+ out = []
112
+ forms.each do |form|
113
+ substitutions.each do |old, new|
114
+ if form.end_with? old
115
+ out.push form[0...-old.length] + new
116
+ end
117
+ end
118
+ end
119
+ return out
120
+ end
121
+
122
+ def self._filter_forms(forms, pos)
123
+ forms.reject{|form| Lemma.find(form, pos).nil?}.uniq
124
+ end
125
+
126
+ # ported from nltk python
127
+ # from jordanbg:
128
+ # Given an original string x
129
+ # 1. Apply rules once to the input to get y1, y2, y3, etc.
130
+ # 2. Return all that are in the database
131
+ # 3. If there are no matches, keep applying rules until you either
132
+ # find a match or you can't go any further
133
+ def self.morphy(form, pos)
134
+ if @exception_map == {}
135
+ self.load_exception_map
136
+ end
137
+ exceptions = @exception_map[pos]
138
+
139
+ # 0. Check the exception lists
140
+ if exceptions.has_key? form
141
+ return self._filter_forms([form] + exceptions[form], pos)
142
+ end
143
+
144
+ # 1. Apply rules once to the input to get y1, y2, y3, etc.
145
+ forms = self._apply_rules([form], pos)
146
+
147
+ # 2. Return all that are in the database (and check the original too)
148
+ results = self._filter_forms([form] + forms, pos)
149
+ if results != []
150
+ return results
151
+ end
152
+
153
+ # 3. If there are no matches, keep applying rules until we find a match
154
+ while forms.length > 0
155
+ forms = self._apply_rules(forms, pos)
156
+ results = self._filter_forms(forms, pos)
157
+ if results != []
158
+ return results
159
+ end
160
+ end
161
+
162
+ # Return an empty list if we can't find anything
163
+ return []
164
+ end
165
+
166
+ def self.morphy_all(form)
167
+ SYNSET_TYPES.values.map{|pos| self.morphy(form, pos)}.flatten
168
+ end
169
+
43
170
  # How many words does this Synset include?
44
171
  def word_count
45
172
  @word_counts.size
@@ -50,14 +177,19 @@ module WordNet
50
177
  @word_counts.keys
51
178
  end
52
179
 
53
- # List of valid +pointer_symbol+s is in pointers.rb
180
+ # Get an array of Synsets with the relation `pointer_symbol` relative to this
181
+ # Synset. Mostly, this is an internal method used by convience methods (e.g. Synset#antonym), but
182
+ # it can take any valid valid +pointer_symbol+ defined in pointers.rb.
183
+ #
184
+ # Example (get the gloss of an antonym for 'fall'):
185
+ # WordNet::Lemma.find("fall", :verb).synsets[1].relation("!")[0].gloss
54
186
  def relation(pointer_symbol)
55
187
  @pointers.select { |pointer| pointer.symbol == pointer_symbol }.
56
188
  map! { |pointer| Synset.new(@synset_type, pointer.offset) }
57
189
  end
58
190
 
59
191
  # Get the Synset of this sense's antonym
60
- def antonym
192
+ def antonyms
61
193
  relation(ANTONYM)
62
194
  end
63
195
 
@@ -66,13 +198,18 @@ module WordNet
66
198
  relation(HYPERNYM)[0]
67
199
  end
68
200
 
201
+ # Get the parent synset (higher-level category, i.e. fruit -> reproductive_structure).
202
+ def hypernyms
203
+ relation(HYPERNYM)
204
+ end
205
+
69
206
  # Get the child synset(s) (i.e., lower-level categories, i.e. fruit -> edible_fruit)
70
- def hyponym
207
+ def hyponyms
71
208
  relation(HYPONYM)
72
209
  end
73
210
 
74
211
  # Get the entire hypernym tree (from this synset all the way up to +entity+) as an array.
75
- def expanded_hypernym
212
+ def expanded_first_hypernyms
76
213
  parent = hypernym
77
214
  list = []
78
215
  return list unless parent
@@ -80,19 +217,61 @@ module WordNet
80
217
  while parent
81
218
  break if list.include? parent.pos_offset
82
219
  list.push parent.pos_offset
83
- parent = parent.parent
220
+ parent = parent.hypernym
221
+ end
222
+
223
+ list.flatten!
224
+ list.map! { |offset| Synset.new(@pos, offset)}
225
+ end
226
+
227
+ # Get the entire hypernym tree (from this synset all the way up to +entity+) as an array.
228
+ def expanded_hypernyms
229
+ parents = hypernyms
230
+ list = []
231
+ return list unless parents
232
+
233
+ while parents.length > 0
234
+ parent = parents.pop
235
+ next if list.include? parent.pos_offset
236
+ list.push parent.pos_offset
237
+ parents.push *parent.hypernyms
84
238
  end
85
239
 
86
240
  list.flatten!
87
241
  list.map! { |offset| Synset.new(@pos, offset)}
88
242
  end
89
243
 
244
+ def expanded_hypernyms_depth
245
+ parents = hypernyms.map{|hypernym| [hypernym, 1]}
246
+ list = []
247
+ out = []
248
+ return list unless parents
249
+
250
+ max_depth = 1
251
+ while parents.length > 0
252
+ parent, depth = parents.pop
253
+ next if list.include? parent.pos_offset
254
+ list.push parent.pos_offset
255
+ out.push [Synset.new(@pos, parent.pos_offset), depth]
256
+ parents.push *(parent.hypernyms.map{|hypernym| [hypernym, depth + 1]})
257
+ max_depth = [max_depth, depth].max
258
+ end
259
+ return [out, max_depth]
260
+ end
261
+
262
+ # Returns a compact, human-readable form of this synset, e.g.
263
+ #
264
+ # (v) fall (descend in free fall under the influence of gravity; "The branch fell from the tree"; "The unfortunate hiker fell into a crevasse")
265
+ #
266
+ # for the second meaning of the verb "fall."
90
267
  def to_s
91
268
  "(#{@synset_type}) #{words.map { |x| x.tr('_',' ') }.join(', ')} (#{@gloss})"
92
269
  end
93
270
 
271
+ alias to_str to_s
94
272
  alias size word_count
95
273
  alias parent hypernym
96
- alias children hyponym
274
+ alias parents hypernyms
275
+ alias children hyponyms
97
276
  end
98
277
  end
@@ -1,3 +1,3 @@
1
1
  module WordNet
2
- VERSION = "1.0.0"
2
+ VERSION = "1.1.0"
3
3
  end