words 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.markdown +4 -2
- data/Rakefile +38 -41
- data/VERSION +1 -1
- data/bin/build_wordnet +44 -8
- data/examples.rb +4 -3
- data/lib/evocations.rb +3 -7
- data/lib/homographs.rb +2 -8
- data/lib/relation.rb +54 -55
- data/lib/synset.rb +134 -132
- data/lib/wordnet_connectors/pure_wordnet_connection.rb +130 -48
- data/lib/wordnet_connectors/tokyo_wordnet_connection.rb +73 -17
- data/lib/words.rb +108 -22
- data/spec/words_spec.rb +38 -0
- data/words.gemspec +6 -7
- metadata +5 -6
- data/lib/wordnet_connection.rb +0 -187
data/README.markdown
CHANGED
@@ -6,7 +6,9 @@ Words implements a fast interface to [Wordnet®](http://wordnet.princeton.edu) w
|
|
6
6
|
|
7
7
|
* Version 0.2 Introduced Pure Ruby Backend
|
8
8
|
* Version 0.3 Introduced Evocation Support (see examples & below) as developed by the [Wordnet® Evocation Project](http://wordnet.cs.princeton.edu/downloads/evocation/release-0.4/README.TXT)
|
9
|
-
* Version 0.4 Substantial performance increase in pure mode (now faster at some things than the tokyo backend) and simplification of use! Full refactoring. Move to RSpec for testing.
|
9
|
+
* Version 0.4 Substantial performance increase in pure mode (now faster at some things than the tokyo backend) and simplification of use! Full refactoring. Move to RSpec for testing. API CHANGES: Words::Words -> Words::Wordnet, close -> close!, connected -> connected? and evocations_enabled? -> evocations?
|
10
|
+
|
11
|
+
Documentation: [Yardoc Live](http://yardoc.org/docs/roja-words)
|
10
12
|
|
11
13
|
## Pre-Installation ##
|
12
14
|
|
@@ -91,7 +93,7 @@ Heres a few little examples of using words within your programs.
|
|
91
93
|
require 'rubygems'
|
92
94
|
require 'words'
|
93
95
|
|
94
|
-
data = Words::Wordnet.new # or: data = Words::
|
96
|
+
data = Words::Wordnet.new # or: data = Words::Wordnet.new(:tokyo) for the tokyo backend
|
95
97
|
|
96
98
|
# to specify a wordnet path Words::Words.new(:pure, '/path/to/wordnet')
|
97
99
|
# to specify the tokyo dataset Words::Words.new(:pure, :search, '/path/to/data.tct')
|
data/Rakefile
CHANGED
@@ -1,58 +1,55 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
require 'rubygems'
|
2
3
|
require 'rake'
|
3
4
|
|
4
5
|
begin
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
Jeweler::RubyforgeTasks.new do |rubyforge|
|
20
|
-
rubyforge.doc_task = "rdoc"
|
21
|
-
end
|
6
|
+
require 'jeweler'
|
7
|
+
Jeweler::Tasks.new do |gem|
|
8
|
+
gem.name = "words"
|
9
|
+
gem.summary = %Q{A Fast & Easy to use interface to WordNet® with cross ruby distribution compatability.}
|
10
|
+
gem.description = %Q{Words, with both pure ruby & tokyo-cabinate backends, implements a fast interface to Wordnet® over the same easy-to-use API. The FFI backend makes use of Tokyo Cabinet and the FFI interface, rufus-tokyo, to provide cross ruby distribution compatability and blistering speed. The pure ruby interface operates on a special ruby optimised index along with the basic dictionary files provided by WordNet®. I have attempted to provide ease of use in the form of a simple yet powerful api and installation is a sintch!}
|
11
|
+
gem.email = "roja@arbia.co.uk"
|
12
|
+
gem.homepage = "http://github.com/roja/words"
|
13
|
+
gem.authors = ["Roja Buck"]
|
14
|
+
gem.executables = [ "build_wordnet" ]
|
15
|
+
gem.default_executable = "build_wordnet"
|
16
|
+
gem.rubyforge_project = 'words'
|
17
|
+
gem.add_development_dependency "rspec", ">= 1.2.9"
|
18
|
+
end
|
19
|
+
Jeweler::GemcutterTasks.new
|
22
20
|
rescue LoadError
|
23
|
-
|
21
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
24
22
|
end
|
25
23
|
|
26
|
-
require 'rake/
|
27
|
-
Rake::
|
28
|
-
|
29
|
-
|
30
|
-
test.verbose = true
|
24
|
+
require 'spec/rake/spectask'
|
25
|
+
Spec::Rake::SpecTask.new(:spec) do |spec|
|
26
|
+
spec.libs << 'lib' << 'spec'
|
27
|
+
spec.spec_files = FileList['spec/**/*_spec.rb']
|
31
28
|
end
|
32
29
|
|
33
30
|
begin
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
31
|
+
require 'rcov'
|
32
|
+
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
33
|
+
spec.libs << 'lib' << 'spec'
|
34
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
35
|
+
spec.rcov = true
|
36
|
+
end
|
40
37
|
rescue LoadError
|
41
|
-
|
42
|
-
|
43
|
-
|
38
|
+
task :rcov do
|
39
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install rcov"
|
40
|
+
end
|
44
41
|
end
|
45
42
|
|
46
|
-
task :
|
43
|
+
task :spec => :check_dependencies
|
47
44
|
|
48
|
-
task :default => :
|
45
|
+
task :default => :spec
|
49
46
|
|
50
47
|
require 'rake/rdoctask'
|
51
48
|
Rake::RDocTask.new do |rdoc|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
end
|
49
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
50
|
+
|
51
|
+
rdoc.rdoc_dir = 'rdoc'
|
52
|
+
rdoc.title = "test #{version}"
|
53
|
+
rdoc.rdoc_files.include('README*')
|
54
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
55
|
+
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.1
|
data/bin/build_wordnet
CHANGED
@@ -7,9 +7,11 @@ require 'pathname'
|
|
7
7
|
require 'rubygems'
|
8
8
|
|
9
9
|
# standard library includes
|
10
|
-
require 'trollop'
|
10
|
+
#require 'trollop'
|
11
11
|
require 'zlib'
|
12
12
|
require 'net/http'
|
13
|
+
require 'optparse'
|
14
|
+
require 'pp'
|
13
15
|
|
14
16
|
# local includes
|
15
17
|
require File.join(File.dirname(__FILE__), '..', 'lib', 'words.rb')
|
@@ -19,16 +21,50 @@ POS_FILE_TYPE_TO_SHORT = { 'adj' => 'a', 'adv' => 'r', 'noun' => 'n', 'verb' =>
|
|
19
21
|
|
20
22
|
puts "Words Dataset Constructor 2010 (c) Roja Buck"
|
21
23
|
|
22
|
-
opts =
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
24
|
+
opts = { :quiet => false, :build_tokyo => false, :build_tokyo_with_evocations => false, :build_pure_evocations => false, :wordnet => 'Search...' }
|
25
|
+
|
26
|
+
optparse = OptionParser.new do|option|
|
27
|
+
|
28
|
+
option.on( '-q', '--quiet', "Don't output verbose program detail. (Default: false)" ) do
|
29
|
+
opts[:quiet] = true
|
30
|
+
end
|
31
|
+
|
32
|
+
option.on( '-w', '--wordnet FILE', "Location of the wordnet dictionary directory. (Default: Search)" ) do|f|
|
33
|
+
opts[:wordnet] = f
|
34
|
+
end
|
35
|
+
|
36
|
+
option.on( '-t', '--build-tokyo', "Build the tokyo wordnet dataset? (Default: false)" ) do
|
37
|
+
opts[:build_tokyo] = true
|
38
|
+
end
|
39
|
+
|
40
|
+
option.on( '-x', '--build-tokyo-with-evocations', "Build the tokyo dataset with the similarity dataset based on the wordnet evocation project? (Default: false) NOTE: requires internet connection." ) do
|
41
|
+
opts[:build_tokyo_with_evocations] = true
|
42
|
+
end
|
43
|
+
|
44
|
+
option.on( '-e', '--build-pure-evocations', "Build the similarity dataset based on the wordnet evocation project for use with the pure words mode. (Default: false) NOTE: requires internet connection." ) do
|
45
|
+
opts[:build_pure_evocations] = true
|
46
|
+
end
|
47
|
+
|
48
|
+
option.on( '-h', '--help', 'Display this screen' ) do
|
49
|
+
puts option
|
50
|
+
exit
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
optparse.parse!
|
56
|
+
|
57
|
+
if !opts[:build_tokyo] && !opts[:build_tokyo_with_evocations] && !opts[:build_pure_evocations]
|
58
|
+
puts "ERROR: You need to specify at least one dataset you want to build."
|
59
|
+
exit
|
28
60
|
end
|
29
|
-
Trollop::die :build_tokyo, "You need to specify which dataset you want to build." if !opts[:build_tokyo] && !opts[:build_tokyo_with_evocations] && !opts[:build_pure_evocations]
|
30
61
|
puts "Verbose mode enabled" if (VERBOSE = !opts[:quiet])
|
31
62
|
|
63
|
+
pp "Options:", opts
|
64
|
+
|
65
|
+
exit
|
66
|
+
|
67
|
+
|
32
68
|
require 'rufus-tokyo' if opts[:build_tokyo] || opts[:build_tokyo_with_evocations]
|
33
69
|
|
34
70
|
gem_path = Pathname.new "#{File.dirname(__FILE__)}/.."
|
data/examples.rb
CHANGED
data/lib/evocations.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
1
3
|
# local includes
|
2
4
|
require File.join(File.dirname(__FILE__), 'synset.rb')
|
3
5
|
|
@@ -7,9 +9,7 @@ module Words
|
|
7
9
|
|
8
10
|
def initialize(evocation_construct, source_synset, wordnet_connection)
|
9
11
|
|
10
|
-
@wordnet_connection = wordnet_connection
|
11
|
-
@source = source_synset
|
12
|
-
@evocation_construct = evocation_construct
|
12
|
+
@evocation_construct, @source, @wordnet_connection = evocation_construct, source_synset, wordnet_connection
|
13
13
|
|
14
14
|
end
|
15
15
|
|
@@ -17,16 +17,12 @@ module Words
|
|
17
17
|
|
18
18
|
@means ||= @evocation_construct["means"].split('|')
|
19
19
|
|
20
|
-
@means
|
21
|
-
|
22
20
|
end
|
23
21
|
|
24
22
|
def medians
|
25
23
|
|
26
24
|
@medians ||= @evocation_construct["medians"].split('|')
|
27
25
|
|
28
|
-
@medians
|
29
|
-
|
30
26
|
end
|
31
27
|
|
32
28
|
def size
|
data/lib/homographs.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
1
3
|
# local includes
|
2
4
|
require File.join(File.dirname(__FILE__), 'synset.rb')
|
3
5
|
|
@@ -35,32 +37,24 @@ module Words
|
|
35
37
|
|
36
38
|
@tagsense_counts ||= @raw_homographs["tagsense_counts"].split('|').map { |count| { POS_TO_SYMBOL[count[0,1]] => count[1..-1].to_i } }
|
37
39
|
|
38
|
-
@tagsense_counts
|
39
|
-
|
40
40
|
end
|
41
41
|
|
42
42
|
def lemma
|
43
43
|
|
44
44
|
@lemma ||= @raw_homographs["lemma"].gsub('_', ' ')
|
45
45
|
|
46
|
-
@lemma
|
47
|
-
|
48
46
|
end
|
49
47
|
|
50
48
|
def available_pos
|
51
49
|
|
52
50
|
@available_pos ||= synset_ids.map { |synset_id| POS_TO_SYMBOL[synset_id[0,1]] }.uniq
|
53
51
|
|
54
|
-
@available_pos
|
55
|
-
|
56
52
|
end
|
57
53
|
|
58
54
|
def to_s
|
59
55
|
|
60
56
|
@to_s ||= [lemma, " " + available_pos.join("/")].join(",")
|
61
57
|
|
62
|
-
@to_s
|
63
|
-
|
64
58
|
end
|
65
59
|
|
66
60
|
def size(pos = :all)
|
data/lib/relation.rb
CHANGED
@@ -1,91 +1,90 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
1
3
|
# local includes
|
2
4
|
require File.join(File.dirname(__FILE__), 'synset.rb')
|
3
5
|
|
4
6
|
module Words
|
5
7
|
|
6
|
-
|
8
|
+
class Relation
|
7
9
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
RELATION_TO_SYMBOL = { "-c" => :member_of_this_domain_topic, "+" => :derivationally_related_form, "%p" => :part_meronym, "~i" => :instance_hyponym, "@" => :hypernym,
|
11
|
+
";r" => :domain_of_synset_region, "!" => :antonym, "#p" => :part_holonym, "%s" => :substance_meronym, ";u" => :domain_of_synset_usage,
|
12
|
+
"-r" => :member_of_this_domain_region, "#s" => :substance_holonym, "=" => :attribute, "-u" => :member_of_this_domain_usage, ";c" => :domain_of_synset_topic,
|
13
|
+
"%m"=> :member_meronym, "~" => :hyponym, "@i" => :instance_hypernym, "#m" => :member_holonym, "$" => :verb_group, ">" => :cause, "*" => :entailment,
|
14
|
+
"\\" => :pertainym, "<" => :participle_of_verb, "&" => :similar_to, "^" => :see_also }
|
15
|
+
SYMBOL_TO_RELATION = RELATION_TO_SYMBOL.invert
|
14
16
|
|
15
|
-
|
17
|
+
def initialize(relation_construct, source_synset, wordnet_connection)
|
16
18
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
19
|
+
@wordnet_connection = wordnet_connection
|
20
|
+
@symbol, @dest_synset_id, @pos, @source_dest = relation_construct.split('.')
|
21
|
+
@dest_synset_id = @pos + @dest_synset_id
|
22
|
+
@symbol = RELATION_TO_SYMBOL[@symbol]
|
23
|
+
@source_synset = source_synset
|
22
24
|
|
23
|
-
|
25
|
+
end
|
24
26
|
|
25
|
-
|
27
|
+
def is_semantic?
|
26
28
|
|
27
|
-
|
29
|
+
@source_dest == "0000"
|
28
30
|
|
29
|
-
|
31
|
+
end
|
30
32
|
|
31
|
-
|
33
|
+
def source_word
|
32
34
|
|
33
|
-
|
35
|
+
return nil if is_semantic?
|
36
|
+
@source_word ||= @source_synset.words[@source_dest[0..1].to_i(16)-1]
|
34
37
|
|
35
|
-
|
38
|
+
end
|
36
39
|
|
37
|
-
|
40
|
+
def destination_word
|
38
41
|
|
39
|
-
|
42
|
+
return nil if is_semantic?
|
43
|
+
@destination_word ||= destination.words[@source_dest[2..3].to_i(16)-1]
|
40
44
|
|
41
|
-
|
45
|
+
end
|
42
46
|
|
43
|
-
|
47
|
+
def relation_type?(type)
|
44
48
|
|
45
|
-
|
49
|
+
case
|
50
|
+
when SYMBOL_TO_RELATION.include?(type.to_sym)
|
51
|
+
type.to_sym == @symbol
|
52
|
+
when RELATION_TO_SYMBOL.include?(pos.to_s)
|
53
|
+
POINTER_TO_SYMBOL[type.to_sym] == @symbol
|
54
|
+
else
|
55
|
+
false
|
56
|
+
end
|
46
57
|
|
47
|
-
|
58
|
+
end
|
48
59
|
|
49
|
-
|
50
|
-
when SYMBOL_TO_RELATION.include?(type.to_sym)
|
51
|
-
type.to_sym == @symbol
|
52
|
-
when RELATION_TO_SYMBOL.include?(pos.to_s)
|
53
|
-
POINTER_TO_SYMBOL[type.to_sym] == @symbol
|
54
|
-
else
|
55
|
-
false
|
56
|
-
end
|
60
|
+
def relation_type
|
57
61
|
|
58
|
-
|
62
|
+
@symbol
|
59
63
|
|
60
|
-
|
64
|
+
end
|
61
65
|
|
62
|
-
|
66
|
+
def destination
|
63
67
|
|
64
|
-
|
68
|
+
@destination ||= Synset.new(@dest_synset_id, @wordnet_connection, nil)
|
65
69
|
|
66
|
-
|
70
|
+
end
|
67
71
|
|
68
|
-
|
72
|
+
def to_s
|
69
73
|
|
70
|
-
|
74
|
+
if is_semantic?
|
75
|
+
@to_s ||= "Semantic #{relation_type.to_s.gsub('_', ' ')} relation between #{@source_synset.synset_id} and #{@dest_synset_id}"
|
76
|
+
else
|
77
|
+
@to_s ||= "#{relation_type.to_s.gsub('_', ' ').capitalize} relation between #{@source_synset.synset_id}'s word \"#{source_word}\" and #{@dest_synset_id}'s word \"#{destination_word}\""
|
78
|
+
end
|
71
79
|
|
72
|
-
|
80
|
+
end
|
73
81
|
|
74
|
-
|
82
|
+
def inspect
|
75
83
|
|
76
|
-
|
77
|
-
@to_s = "Semantic #{relation_type.to_s.gsub('_', ' ')} relation between #{@source_synset.synset_id} and #{@dest_synset_id}" if is_semantic? && !defined?(@to_s)
|
78
|
-
|
79
|
-
@to_s
|
80
|
-
|
81
|
-
end
|
82
|
-
|
83
|
-
def inspect
|
84
|
-
|
85
|
-
{ :symbol => @symbol, :dest_synset_id => @dest_synset_id, :pos => @pos, :source_dest => @source_dest }.inspect
|
84
|
+
{ :symbol => @symbol, :dest_synset_id => @dest_synset_id, :pos => @pos, :source_dest => @source_dest }.inspect
|
86
85
|
|
87
|
-
|
86
|
+
end
|
88
87
|
|
89
|
-
|
88
|
+
end
|
90
89
|
|
91
90
|
end
|
data/lib/synset.rb
CHANGED
@@ -1,199 +1,201 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
1
3
|
# local includes
|
2
4
|
require File.join(File.dirname(__FILE__), 'relation.rb')
|
3
5
|
require File.join(File.dirname(__FILE__), 'evocations.rb')
|
4
6
|
|
5
7
|
module Words
|
6
8
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
def initialize(synset_id, wordnet_connection, homographs)
|
58
|
-
|
59
|
-
@wordnet_connection = wordnet_connection
|
60
|
-
@synset_hash = wordnet_connection.synset(synset_id)
|
61
|
-
@homographs = homographs
|
62
|
-
|
63
|
-
# construct some conveniance menthods for relation type access
|
64
|
-
Relation::SYMBOL_TO_RELATION.keys.each do |relation_type|
|
65
|
-
self.class.send(:define_method, "#{relation_type}s?") do
|
66
|
-
relations(relation_type).size > 0
|
67
|
-
end
|
68
|
-
self.class.send(:define_method, "#{relation_type}s") do
|
69
|
-
relations(relation_type)
|
70
|
-
end
|
71
|
-
end
|
9
|
+
class Synset
|
10
|
+
|
11
|
+
SYNSET_TYPE_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb, "s" => :adjective_satallite }
|
12
|
+
SYNSET_TYPE_TO_NUMBER = { "n" => 1, "v" => 2, "a" => 3, "r" => 4, "s" => 5 }
|
13
|
+
NUM_TO_LEX = [ { :lex => :adj_all, :description => "all adjective clusters" },
|
14
|
+
{ :lex => :adj_pert, :description => "relational adjectives (pertainyms)" },
|
15
|
+
{ :lex => :adv_all, :description => "all adverbs" },
|
16
|
+
{ :lex => :noun_Tops, :description => "unique beginner for nouns" },
|
17
|
+
{ :lex => :noun_act, :description => "nouns denoting acts or actions" },
|
18
|
+
{ :lex => :noun_animal, :description => "nouns denoting animals" },
|
19
|
+
{ :lex => :noun_artifact, :description => "nouns denoting man-made objects" },
|
20
|
+
{ :lex => :noun_attribute, :description => "nouns denoting attributes of people and objects" },
|
21
|
+
{ :lex => :noun_body, :description => "nouns denoting body parts" },
|
22
|
+
{ :lex => :noun_cognition, :description => "nouns denoting cognitive processes and contents" },
|
23
|
+
{ :lex => :noun_communication, :description => "nouns denoting communicative processes and contents" },
|
24
|
+
{ :lex => :noun_event, :description => "nouns denoting natural events" },
|
25
|
+
{ :lex => :noun_feeling, :description => "nouns denoting feelings and emotions" },
|
26
|
+
{ :lex => :noun_food, :description => "nouns denoting foods and drinks" },
|
27
|
+
{ :lex => :noun_group, :description => "nouns denoting groupings of people or objects" },
|
28
|
+
{ :lex => :noun_location, :description => "nouns denoting spatial position" },
|
29
|
+
{ :lex => :noun_motive, :description => "nouns denoting goals" },
|
30
|
+
{ :lex => :noun_object, :description => "nouns denoting natural objects (not man-made)" },
|
31
|
+
{ :lex => :noun_person, :description => "nouns denoting people" },
|
32
|
+
{ :lex => :noun_phenomenon, :description => "nouns denoting natural phenomena" },
|
33
|
+
{ :lex => :noun_plant, :description => "nouns denoting plants" },
|
34
|
+
{ :lex => :noun_possession, :description => "nouns denoting possession and transfer of possession" },
|
35
|
+
{ :lex => :noun_process, :description => "nouns denoting natural processes" },
|
36
|
+
{ :lex => :noun_quantity, :description => "nouns denoting quantities and units of measure" },
|
37
|
+
{ :lex => :noun_relation, :description => "nouns denoting relations between people or things or ideas" },
|
38
|
+
{ :lex => :noun_shape, :description => "nouns denoting two and three dimensional shapes" },
|
39
|
+
{ :lex => :noun_state, :description => "nouns denoting stable states of affairs" },
|
40
|
+
{ :lex => :noun_substance, :description => "nouns denoting substances" },
|
41
|
+
{ :lex => :noun_time, :description => "nouns denoting time and temporal relations" },
|
42
|
+
{ :lex => :verb_body, :description => "verbs of grooming, dressing and bodily care" },
|
43
|
+
{ :lex => :verb_change, :description => "verbs of size, temperature change, intensifying, etc." },
|
44
|
+
{ :lex => :verb_cognition, :description => "verbs of thinking, judging, analyzing, doubting" },
|
45
|
+
{ :lex => :verb_communication, :description => "verbs of telling, asking, ordering, singing" },
|
46
|
+
{ :lex => :verb_competition, :description => "verbs of fighting, athletic activities" },
|
47
|
+
{ :lex => :verb_consumption, :description => "verbs of eating and drinking" },
|
48
|
+
{ :lex => :verb_contact, :description => "verbs of touching, hitting, tying, digging" },
|
49
|
+
{ :lex => :verb_creation, :description => "verbs of sewing, baking, painting, performing" },
|
50
|
+
{ :lex => :verb_emotion, :description => "verbs of feeling" },
|
51
|
+
{ :lex => :verb_motion, :description => "verbs of walking, flying, swimming" },
|
52
|
+
{ :lex => :verb_perception, :description => "verbs of seeing, hearing, feeling" },
|
53
|
+
{ :lex => :verb_possession, :description => "verbs of buying, selling, owning" },
|
54
|
+
{ :lex => :verb_social, :description => "verbs of political and social activities and events" },
|
55
|
+
{ :lex => :verb_stative, :description => "verbs of being, having, spatial relations" },
|
56
|
+
{ :lex => :verb_weather, :description => "verbs of raining, snowing, thawing, thundering" },
|
57
|
+
{ :lex => :adj_ppl, :description => "participial adjectives" } ]
|
72
58
|
|
73
|
-
|
59
|
+
def initialize(synset_id, wordnet_connection, homographs)
|
74
60
|
|
75
|
-
|
61
|
+
@wordnet_connection = wordnet_connection
|
62
|
+
@synset_hash = wordnet_connection.synset(synset_id)
|
63
|
+
@homographs = homographs
|
76
64
|
|
77
|
-
|
65
|
+
# construct some conveniance menthods for relation type access
|
66
|
+
Relation::SYMBOL_TO_RELATION.keys.each do |relation_type|
|
67
|
+
self.class.send(:define_method, "#{relation_type}s?") do
|
68
|
+
relations(relation_type).size > 0
|
69
|
+
end
|
70
|
+
self.class.send(:define_method, "#{relation_type}s") do
|
71
|
+
relations(relation_type)
|
72
|
+
end
|
73
|
+
end
|
78
74
|
|
79
|
-
|
75
|
+
end
|
80
76
|
|
81
|
-
|
77
|
+
def synset_type
|
82
78
|
|
83
|
-
|
79
|
+
SYNSET_TYPE_TO_SYMBOL[@synset_hash["synset_type"]]
|
84
80
|
|
85
|
-
|
81
|
+
end
|
86
82
|
|
87
|
-
|
83
|
+
def words
|
88
84
|
|
89
|
-
|
85
|
+
@words ||= map_from_words_with_lexical_ids(:word)
|
90
86
|
|
91
|
-
|
87
|
+
end
|
92
88
|
|
93
|
-
|
89
|
+
def lexical_ids
|
94
90
|
|
95
|
-
|
91
|
+
@words ||= map_from_words_with_lexical_ids(:lexical_id)
|
96
92
|
|
97
|
-
|
93
|
+
end
|
98
94
|
|
99
|
-
|
95
|
+
def size
|
100
96
|
|
101
|
-
|
97
|
+
words.size
|
102
98
|
|
103
|
-
|
99
|
+
end
|
104
100
|
|
105
|
-
|
101
|
+
def words_with_lexical_ids
|
106
102
|
|
107
|
-
|
103
|
+
@words_with_num ||= @synset_hash["words"].split('|').map { |word| word_parts = word.split('.'); { :word => word_parts[0].gsub('_', ' '), :lexical_id => word_parts[1] } }
|
108
104
|
|
109
|
-
|
105
|
+
end
|
110
106
|
|
111
|
-
|
107
|
+
def lexical_filenum
|
112
108
|
|
113
|
-
|
109
|
+
@synset_hash["lexical_filenum"]
|
114
110
|
|
115
|
-
|
111
|
+
end
|
116
112
|
|
117
|
-
|
113
|
+
def lexical_catagory
|
118
114
|
|
119
|
-
|
115
|
+
lexical[:lex]
|
120
116
|
|
121
|
-
|
117
|
+
end
|
122
118
|
|
123
|
-
|
119
|
+
def lexical_description
|
124
120
|
|
125
|
-
|
121
|
+
lexical[:description]
|
126
122
|
|
127
|
-
|
123
|
+
end
|
128
124
|
|
129
|
-
|
125
|
+
def lexical
|
130
126
|
|
131
|
-
|
127
|
+
NUM_TO_LEX[lexical_filenum.to_i]
|
132
128
|
|
133
|
-
|
129
|
+
end
|
134
130
|
|
135
|
-
|
131
|
+
def synset_id
|
136
132
|
|
137
|
-
|
133
|
+
@synset_hash["synset_id"]
|
138
134
|
|
139
|
-
|
135
|
+
end
|
140
136
|
|
141
|
-
|
137
|
+
def gloss
|
142
138
|
|
143
|
-
|
139
|
+
@synset_hash["gloss"]
|
144
140
|
|
145
|
-
|
141
|
+
end
|
146
142
|
|
147
|
-
|
143
|
+
def lemma
|
148
144
|
|
149
|
-
|
145
|
+
@homographs.lemma
|
150
146
|
|
151
|
-
|
147
|
+
end
|
152
148
|
|
153
|
-
|
149
|
+
def homographs
|
154
150
|
|
155
|
-
|
151
|
+
@homographs
|
156
152
|
|
157
|
-
|
153
|
+
end
|
158
154
|
|
159
|
-
|
155
|
+
def inspect
|
160
156
|
|
161
|
-
|
157
|
+
@synset_hash.inspect
|
162
158
|
|
163
|
-
|
159
|
+
end
|
164
160
|
|
165
|
-
|
161
|
+
def relations(type = :all)
|
166
162
|
|
167
|
-
|
163
|
+
@relations ||= @synset_hash["relations"].split('|').map { |relation| Relation.new(relation, self, @wordnet_connection) }
|
168
164
|
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
165
|
+
case
|
166
|
+
when Relation::SYMBOL_TO_RELATION.include?(type.to_sym)
|
167
|
+
@relations.select { |relation| relation.relation_type == type.to_sym }
|
168
|
+
when Relation::RELATION_TO_SYMBOL.include?(type.to_s)
|
169
|
+
@relations.select { |relation| relation.relation_type == Relation::RELATION_TO_SYMBOL[type.to_s] }
|
170
|
+
else
|
171
|
+
@relations
|
172
|
+
end
|
177
173
|
|
178
|
-
|
174
|
+
end
|
179
175
|
|
180
|
-
|
176
|
+
def evocations
|
181
177
|
|
182
|
-
|
183
|
-
|
178
|
+
evocations_arr = @wordnet_connection.evocations(synset_id)
|
179
|
+
Evocations.new(evocations_arr, self, @wordnet_connection) unless evocations_arr.nil?
|
184
180
|
|
185
|
-
|
181
|
+
end
|
186
182
|
|
187
|
-
|
183
|
+
def to_s
|
188
184
|
|
189
|
-
|
185
|
+
@to_s ||= "#{synset_type.to_s.capitalize} including word(s): #{words.map { |word| '"' + word + '"' }.join(', ')} meaning: #{gloss}"
|
190
186
|
|
191
|
-
|
187
|
+
end
|
192
188
|
|
193
|
-
|
189
|
+
alias word lemma
|
190
|
+
|
191
|
+
private
|
194
192
|
|
195
|
-
|
193
|
+
def map_from_words_with_lexical_ids(value)
|
196
194
|
|
197
|
-
|
195
|
+
words_with_lexical_ids.map { |word_with_num| word_with_num[value] }
|
196
|
+
|
197
|
+
end
|
198
|
+
|
199
|
+
end
|
198
200
|
|
199
201
|
end
|