words 0.4.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.markdown +4 -2
- data/Rakefile +38 -41
- data/VERSION +1 -1
- data/bin/build_wordnet +44 -8
- data/examples.rb +4 -3
- data/lib/evocations.rb +3 -7
- data/lib/homographs.rb +2 -8
- data/lib/relation.rb +54 -55
- data/lib/synset.rb +134 -132
- data/lib/wordnet_connectors/pure_wordnet_connection.rb +130 -48
- data/lib/wordnet_connectors/tokyo_wordnet_connection.rb +73 -17
- data/lib/words.rb +108 -22
- data/spec/words_spec.rb +38 -0
- data/words.gemspec +6 -7
- metadata +5 -6
- data/lib/wordnet_connection.rb +0 -187
data/README.markdown
CHANGED
@@ -6,7 +6,9 @@ Words implements a fast interface to [Wordnet®](http://wordnet.princeton.edu) w
|
|
6
6
|
|
7
7
|
* Version 0.2 Introduced Pure Ruby Backend
|
8
8
|
* Version 0.3 Introduced Evocation Support (see examples & below) as developed by the [Wordnet® Evocation Project](http://wordnet.cs.princeton.edu/downloads/evocation/release-0.4/README.TXT)
|
9
|
-
* Version 0.4 Substantial performance increase in pure mode (now faster at some things than the tokyo backend) and simplification of use! Full refactoring. Move to RSpec for testing.
|
9
|
+
* Version 0.4 Substantial performance increase in pure mode (now faster at some things than the tokyo backend) and simplification of use! Full refactoring. Move to RSpec for testing. API CHANGES: Words::Words -> Words::Wordnet, close -> close!, connected -> connected? and evocations_enabled? -> evocations?
|
10
|
+
|
11
|
+
Documentation: [Yardoc Live](http://yardoc.org/docs/roja-words)
|
10
12
|
|
11
13
|
## Pre-Installation ##
|
12
14
|
|
@@ -91,7 +93,7 @@ Heres a few little examples of using words within your programs.
|
|
91
93
|
require 'rubygems'
|
92
94
|
require 'words'
|
93
95
|
|
94
|
-
data = Words::Wordnet.new # or: data = Words::
|
96
|
+
data = Words::Wordnet.new # or: data = Words::Wordnet.new(:tokyo) for the tokyo backend
|
95
97
|
|
96
98
|
# to specify a wordnet path Words::Words.new(:pure, '/path/to/wordnet')
|
97
99
|
# to specify the tokyo dataset Words::Words.new(:pure, :search, '/path/to/data.tct')
|
data/Rakefile
CHANGED
@@ -1,58 +1,55 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
require 'rubygems'
|
2
3
|
require 'rake'
|
3
4
|
|
4
5
|
begin
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
Jeweler::RubyforgeTasks.new do |rubyforge|
|
20
|
-
rubyforge.doc_task = "rdoc"
|
21
|
-
end
|
6
|
+
require 'jeweler'
|
7
|
+
Jeweler::Tasks.new do |gem|
|
8
|
+
gem.name = "words"
|
9
|
+
gem.summary = %Q{A Fast & Easy to use interface to WordNet® with cross ruby distribution compatability.}
|
10
|
+
gem.description = %Q{Words, with both pure ruby & tokyo-cabinate backends, implements a fast interface to Wordnet® over the same easy-to-use API. The FFI backend makes use of Tokyo Cabinet and the FFI interface, rufus-tokyo, to provide cross ruby distribution compatability and blistering speed. The pure ruby interface operates on a special ruby optimised index along with the basic dictionary files provided by WordNet®. I have attempted to provide ease of use in the form of a simple yet powerful api and installation is a sintch!}
|
11
|
+
gem.email = "roja@arbia.co.uk"
|
12
|
+
gem.homepage = "http://github.com/roja/words"
|
13
|
+
gem.authors = ["Roja Buck"]
|
14
|
+
gem.executables = [ "build_wordnet" ]
|
15
|
+
gem.default_executable = "build_wordnet"
|
16
|
+
gem.rubyforge_project = 'words'
|
17
|
+
gem.add_development_dependency "rspec", ">= 1.2.9"
|
18
|
+
end
|
19
|
+
Jeweler::GemcutterTasks.new
|
22
20
|
rescue LoadError
|
23
|
-
|
21
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
24
22
|
end
|
25
23
|
|
26
|
-
require 'rake/
|
27
|
-
Rake::
|
28
|
-
|
29
|
-
|
30
|
-
test.verbose = true
|
24
|
+
require 'spec/rake/spectask'
|
25
|
+
Spec::Rake::SpecTask.new(:spec) do |spec|
|
26
|
+
spec.libs << 'lib' << 'spec'
|
27
|
+
spec.spec_files = FileList['spec/**/*_spec.rb']
|
31
28
|
end
|
32
29
|
|
33
30
|
begin
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
31
|
+
require 'rcov'
|
32
|
+
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
33
|
+
spec.libs << 'lib' << 'spec'
|
34
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
35
|
+
spec.rcov = true
|
36
|
+
end
|
40
37
|
rescue LoadError
|
41
|
-
|
42
|
-
|
43
|
-
|
38
|
+
task :rcov do
|
39
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install rcov"
|
40
|
+
end
|
44
41
|
end
|
45
42
|
|
46
|
-
task :
|
43
|
+
task :spec => :check_dependencies
|
47
44
|
|
48
|
-
task :default => :
|
45
|
+
task :default => :spec
|
49
46
|
|
50
47
|
require 'rake/rdoctask'
|
51
48
|
Rake::RDocTask.new do |rdoc|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
end
|
49
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
50
|
+
|
51
|
+
rdoc.rdoc_dir = 'rdoc'
|
52
|
+
rdoc.title = "test #{version}"
|
53
|
+
rdoc.rdoc_files.include('README*')
|
54
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
55
|
+
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.1
|
data/bin/build_wordnet
CHANGED
@@ -7,9 +7,11 @@ require 'pathname'
|
|
7
7
|
require 'rubygems'
|
8
8
|
|
9
9
|
# standard library includes
|
10
|
-
require 'trollop'
|
10
|
+
#require 'trollop'
|
11
11
|
require 'zlib'
|
12
12
|
require 'net/http'
|
13
|
+
require 'optparse'
|
14
|
+
require 'pp'
|
13
15
|
|
14
16
|
# local includes
|
15
17
|
require File.join(File.dirname(__FILE__), '..', 'lib', 'words.rb')
|
@@ -19,16 +21,50 @@ POS_FILE_TYPE_TO_SHORT = { 'adj' => 'a', 'adv' => 'r', 'noun' => 'n', 'verb' =>
|
|
19
21
|
|
20
22
|
puts "Words Dataset Constructor 2010 (c) Roja Buck"
|
21
23
|
|
22
|
-
opts =
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
24
|
+
opts = { :quiet => false, :build_tokyo => false, :build_tokyo_with_evocations => false, :build_pure_evocations => false, :wordnet => 'Search...' }
|
25
|
+
|
26
|
+
optparse = OptionParser.new do|option|
|
27
|
+
|
28
|
+
option.on( '-q', '--quiet', "Don't output verbose program detail. (Default: false)" ) do
|
29
|
+
opts[:quiet] = true
|
30
|
+
end
|
31
|
+
|
32
|
+
option.on( '-w', '--wordnet FILE', "Location of the wordnet dictionary directory. (Default: Search)" ) do|f|
|
33
|
+
opts[:wordnet] = f
|
34
|
+
end
|
35
|
+
|
36
|
+
option.on( '-t', '--build-tokyo', "Build the tokyo wordnet dataset? (Default: false)" ) do
|
37
|
+
opts[:build_tokyo] = true
|
38
|
+
end
|
39
|
+
|
40
|
+
option.on( '-x', '--build-tokyo-with-evocations', "Build the tokyo dataset with the similarity dataset based on the wordnet evocation project? (Default: false) NOTE: requires internet connection." ) do
|
41
|
+
opts[:build_tokyo_with_evocations] = true
|
42
|
+
end
|
43
|
+
|
44
|
+
option.on( '-e', '--build-pure-evocations', "Build the similarity dataset based on the wordnet evocation project for use with the pure words mode. (Default: false) NOTE: requires internet connection." ) do
|
45
|
+
opts[:build_pure_evocations] = true
|
46
|
+
end
|
47
|
+
|
48
|
+
option.on( '-h', '--help', 'Display this screen' ) do
|
49
|
+
puts option
|
50
|
+
exit
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
optparse.parse!
|
56
|
+
|
57
|
+
if !opts[:build_tokyo] && !opts[:build_tokyo_with_evocations] && !opts[:build_pure_evocations]
|
58
|
+
puts "ERROR: You need to specify at least one dataset you want to build."
|
59
|
+
exit
|
28
60
|
end
|
29
|
-
Trollop::die :build_tokyo, "You need to specify which dataset you want to build." if !opts[:build_tokyo] && !opts[:build_tokyo_with_evocations] && !opts[:build_pure_evocations]
|
30
61
|
puts "Verbose mode enabled" if (VERBOSE = !opts[:quiet])
|
31
62
|
|
63
|
+
pp "Options:", opts
|
64
|
+
|
65
|
+
exit
|
66
|
+
|
67
|
+
|
32
68
|
require 'rufus-tokyo' if opts[:build_tokyo] || opts[:build_tokyo_with_evocations]
|
33
69
|
|
34
70
|
gem_path = Pathname.new "#{File.dirname(__FILE__)}/.."
|
data/examples.rb
CHANGED
data/lib/evocations.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
1
3
|
# local includes
|
2
4
|
require File.join(File.dirname(__FILE__), 'synset.rb')
|
3
5
|
|
@@ -7,9 +9,7 @@ module Words
|
|
7
9
|
|
8
10
|
def initialize(evocation_construct, source_synset, wordnet_connection)
|
9
11
|
|
10
|
-
@wordnet_connection = wordnet_connection
|
11
|
-
@source = source_synset
|
12
|
-
@evocation_construct = evocation_construct
|
12
|
+
@evocation_construct, @source, @wordnet_connection = evocation_construct, source_synset, wordnet_connection
|
13
13
|
|
14
14
|
end
|
15
15
|
|
@@ -17,16 +17,12 @@ module Words
|
|
17
17
|
|
18
18
|
@means ||= @evocation_construct["means"].split('|')
|
19
19
|
|
20
|
-
@means
|
21
|
-
|
22
20
|
end
|
23
21
|
|
24
22
|
def medians
|
25
23
|
|
26
24
|
@medians ||= @evocation_construct["medians"].split('|')
|
27
25
|
|
28
|
-
@medians
|
29
|
-
|
30
26
|
end
|
31
27
|
|
32
28
|
def size
|
data/lib/homographs.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
1
3
|
# local includes
|
2
4
|
require File.join(File.dirname(__FILE__), 'synset.rb')
|
3
5
|
|
@@ -35,32 +37,24 @@ module Words
|
|
35
37
|
|
36
38
|
@tagsense_counts ||= @raw_homographs["tagsense_counts"].split('|').map { |count| { POS_TO_SYMBOL[count[0,1]] => count[1..-1].to_i } }
|
37
39
|
|
38
|
-
@tagsense_counts
|
39
|
-
|
40
40
|
end
|
41
41
|
|
42
42
|
def lemma
|
43
43
|
|
44
44
|
@lemma ||= @raw_homographs["lemma"].gsub('_', ' ')
|
45
45
|
|
46
|
-
@lemma
|
47
|
-
|
48
46
|
end
|
49
47
|
|
50
48
|
def available_pos
|
51
49
|
|
52
50
|
@available_pos ||= synset_ids.map { |synset_id| POS_TO_SYMBOL[synset_id[0,1]] }.uniq
|
53
51
|
|
54
|
-
@available_pos
|
55
|
-
|
56
52
|
end
|
57
53
|
|
58
54
|
def to_s
|
59
55
|
|
60
56
|
@to_s ||= [lemma, " " + available_pos.join("/")].join(",")
|
61
57
|
|
62
|
-
@to_s
|
63
|
-
|
64
58
|
end
|
65
59
|
|
66
60
|
def size(pos = :all)
|
data/lib/relation.rb
CHANGED
@@ -1,91 +1,90 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
1
3
|
# local includes
|
2
4
|
require File.join(File.dirname(__FILE__), 'synset.rb')
|
3
5
|
|
4
6
|
module Words
|
5
7
|
|
6
|
-
|
8
|
+
class Relation
|
7
9
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
RELATION_TO_SYMBOL = { "-c" => :member_of_this_domain_topic, "+" => :derivationally_related_form, "%p" => :part_meronym, "~i" => :instance_hyponym, "@" => :hypernym,
|
11
|
+
";r" => :domain_of_synset_region, "!" => :antonym, "#p" => :part_holonym, "%s" => :substance_meronym, ";u" => :domain_of_synset_usage,
|
12
|
+
"-r" => :member_of_this_domain_region, "#s" => :substance_holonym, "=" => :attribute, "-u" => :member_of_this_domain_usage, ";c" => :domain_of_synset_topic,
|
13
|
+
"%m"=> :member_meronym, "~" => :hyponym, "@i" => :instance_hypernym, "#m" => :member_holonym, "$" => :verb_group, ">" => :cause, "*" => :entailment,
|
14
|
+
"\\" => :pertainym, "<" => :participle_of_verb, "&" => :similar_to, "^" => :see_also }
|
15
|
+
SYMBOL_TO_RELATION = RELATION_TO_SYMBOL.invert
|
14
16
|
|
15
|
-
|
17
|
+
def initialize(relation_construct, source_synset, wordnet_connection)
|
16
18
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
19
|
+
@wordnet_connection = wordnet_connection
|
20
|
+
@symbol, @dest_synset_id, @pos, @source_dest = relation_construct.split('.')
|
21
|
+
@dest_synset_id = @pos + @dest_synset_id
|
22
|
+
@symbol = RELATION_TO_SYMBOL[@symbol]
|
23
|
+
@source_synset = source_synset
|
22
24
|
|
23
|
-
|
25
|
+
end
|
24
26
|
|
25
|
-
|
27
|
+
def is_semantic?
|
26
28
|
|
27
|
-
|
29
|
+
@source_dest == "0000"
|
28
30
|
|
29
|
-
|
31
|
+
end
|
30
32
|
|
31
|
-
|
33
|
+
def source_word
|
32
34
|
|
33
|
-
|
35
|
+
return nil if is_semantic?
|
36
|
+
@source_word ||= @source_synset.words[@source_dest[0..1].to_i(16)-1]
|
34
37
|
|
35
|
-
|
38
|
+
end
|
36
39
|
|
37
|
-
|
40
|
+
def destination_word
|
38
41
|
|
39
|
-
|
42
|
+
return nil if is_semantic?
|
43
|
+
@destination_word ||= destination.words[@source_dest[2..3].to_i(16)-1]
|
40
44
|
|
41
|
-
|
45
|
+
end
|
42
46
|
|
43
|
-
|
47
|
+
def relation_type?(type)
|
44
48
|
|
45
|
-
|
49
|
+
case
|
50
|
+
when SYMBOL_TO_RELATION.include?(type.to_sym)
|
51
|
+
type.to_sym == @symbol
|
52
|
+
when RELATION_TO_SYMBOL.include?(pos.to_s)
|
53
|
+
POINTER_TO_SYMBOL[type.to_sym] == @symbol
|
54
|
+
else
|
55
|
+
false
|
56
|
+
end
|
46
57
|
|
47
|
-
|
58
|
+
end
|
48
59
|
|
49
|
-
|
50
|
-
when SYMBOL_TO_RELATION.include?(type.to_sym)
|
51
|
-
type.to_sym == @symbol
|
52
|
-
when RELATION_TO_SYMBOL.include?(pos.to_s)
|
53
|
-
POINTER_TO_SYMBOL[type.to_sym] == @symbol
|
54
|
-
else
|
55
|
-
false
|
56
|
-
end
|
60
|
+
def relation_type
|
57
61
|
|
58
|
-
|
62
|
+
@symbol
|
59
63
|
|
60
|
-
|
64
|
+
end
|
61
65
|
|
62
|
-
|
66
|
+
def destination
|
63
67
|
|
64
|
-
|
68
|
+
@destination ||= Synset.new(@dest_synset_id, @wordnet_connection, nil)
|
65
69
|
|
66
|
-
|
70
|
+
end
|
67
71
|
|
68
|
-
|
72
|
+
def to_s
|
69
73
|
|
70
|
-
|
74
|
+
if is_semantic?
|
75
|
+
@to_s ||= "Semantic #{relation_type.to_s.gsub('_', ' ')} relation between #{@source_synset.synset_id} and #{@dest_synset_id}"
|
76
|
+
else
|
77
|
+
@to_s ||= "#{relation_type.to_s.gsub('_', ' ').capitalize} relation between #{@source_synset.synset_id}'s word \"#{source_word}\" and #{@dest_synset_id}'s word \"#{destination_word}\""
|
78
|
+
end
|
71
79
|
|
72
|
-
|
80
|
+
end
|
73
81
|
|
74
|
-
|
82
|
+
def inspect
|
75
83
|
|
76
|
-
|
77
|
-
@to_s = "Semantic #{relation_type.to_s.gsub('_', ' ')} relation between #{@source_synset.synset_id} and #{@dest_synset_id}" if is_semantic? && !defined?(@to_s)
|
78
|
-
|
79
|
-
@to_s
|
80
|
-
|
81
|
-
end
|
82
|
-
|
83
|
-
def inspect
|
84
|
-
|
85
|
-
{ :symbol => @symbol, :dest_synset_id => @dest_synset_id, :pos => @pos, :source_dest => @source_dest }.inspect
|
84
|
+
{ :symbol => @symbol, :dest_synset_id => @dest_synset_id, :pos => @pos, :source_dest => @source_dest }.inspect
|
86
85
|
|
87
|
-
|
86
|
+
end
|
88
87
|
|
89
|
-
|
88
|
+
end
|
90
89
|
|
91
90
|
end
|
data/lib/synset.rb
CHANGED
@@ -1,199 +1,201 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
1
3
|
# local includes
|
2
4
|
require File.join(File.dirname(__FILE__), 'relation.rb')
|
3
5
|
require File.join(File.dirname(__FILE__), 'evocations.rb')
|
4
6
|
|
5
7
|
module Words
|
6
8
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
def initialize(synset_id, wordnet_connection, homographs)
|
58
|
-
|
59
|
-
@wordnet_connection = wordnet_connection
|
60
|
-
@synset_hash = wordnet_connection.synset(synset_id)
|
61
|
-
@homographs = homographs
|
62
|
-
|
63
|
-
# construct some conveniance menthods for relation type access
|
64
|
-
Relation::SYMBOL_TO_RELATION.keys.each do |relation_type|
|
65
|
-
self.class.send(:define_method, "#{relation_type}s?") do
|
66
|
-
relations(relation_type).size > 0
|
67
|
-
end
|
68
|
-
self.class.send(:define_method, "#{relation_type}s") do
|
69
|
-
relations(relation_type)
|
70
|
-
end
|
71
|
-
end
|
9
|
+
class Synset
|
10
|
+
|
11
|
+
SYNSET_TYPE_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb, "s" => :adjective_satallite }
|
12
|
+
SYNSET_TYPE_TO_NUMBER = { "n" => 1, "v" => 2, "a" => 3, "r" => 4, "s" => 5 }
|
13
|
+
NUM_TO_LEX = [ { :lex => :adj_all, :description => "all adjective clusters" },
|
14
|
+
{ :lex => :adj_pert, :description => "relational adjectives (pertainyms)" },
|
15
|
+
{ :lex => :adv_all, :description => "all adverbs" },
|
16
|
+
{ :lex => :noun_Tops, :description => "unique beginner for nouns" },
|
17
|
+
{ :lex => :noun_act, :description => "nouns denoting acts or actions" },
|
18
|
+
{ :lex => :noun_animal, :description => "nouns denoting animals" },
|
19
|
+
{ :lex => :noun_artifact, :description => "nouns denoting man-made objects" },
|
20
|
+
{ :lex => :noun_attribute, :description => "nouns denoting attributes of people and objects" },
|
21
|
+
{ :lex => :noun_body, :description => "nouns denoting body parts" },
|
22
|
+
{ :lex => :noun_cognition, :description => "nouns denoting cognitive processes and contents" },
|
23
|
+
{ :lex => :noun_communication, :description => "nouns denoting communicative processes and contents" },
|
24
|
+
{ :lex => :noun_event, :description => "nouns denoting natural events" },
|
25
|
+
{ :lex => :noun_feeling, :description => "nouns denoting feelings and emotions" },
|
26
|
+
{ :lex => :noun_food, :description => "nouns denoting foods and drinks" },
|
27
|
+
{ :lex => :noun_group, :description => "nouns denoting groupings of people or objects" },
|
28
|
+
{ :lex => :noun_location, :description => "nouns denoting spatial position" },
|
29
|
+
{ :lex => :noun_motive, :description => "nouns denoting goals" },
|
30
|
+
{ :lex => :noun_object, :description => "nouns denoting natural objects (not man-made)" },
|
31
|
+
{ :lex => :noun_person, :description => "nouns denoting people" },
|
32
|
+
{ :lex => :noun_phenomenon, :description => "nouns denoting natural phenomena" },
|
33
|
+
{ :lex => :noun_plant, :description => "nouns denoting plants" },
|
34
|
+
{ :lex => :noun_possession, :description => "nouns denoting possession and transfer of possession" },
|
35
|
+
{ :lex => :noun_process, :description => "nouns denoting natural processes" },
|
36
|
+
{ :lex => :noun_quantity, :description => "nouns denoting quantities and units of measure" },
|
37
|
+
{ :lex => :noun_relation, :description => "nouns denoting relations between people or things or ideas" },
|
38
|
+
{ :lex => :noun_shape, :description => "nouns denoting two and three dimensional shapes" },
|
39
|
+
{ :lex => :noun_state, :description => "nouns denoting stable states of affairs" },
|
40
|
+
{ :lex => :noun_substance, :description => "nouns denoting substances" },
|
41
|
+
{ :lex => :noun_time, :description => "nouns denoting time and temporal relations" },
|
42
|
+
{ :lex => :verb_body, :description => "verbs of grooming, dressing and bodily care" },
|
43
|
+
{ :lex => :verb_change, :description => "verbs of size, temperature change, intensifying, etc." },
|
44
|
+
{ :lex => :verb_cognition, :description => "verbs of thinking, judging, analyzing, doubting" },
|
45
|
+
{ :lex => :verb_communication, :description => "verbs of telling, asking, ordering, singing" },
|
46
|
+
{ :lex => :verb_competition, :description => "verbs of fighting, athletic activities" },
|
47
|
+
{ :lex => :verb_consumption, :description => "verbs of eating and drinking" },
|
48
|
+
{ :lex => :verb_contact, :description => "verbs of touching, hitting, tying, digging" },
|
49
|
+
{ :lex => :verb_creation, :description => "verbs of sewing, baking, painting, performing" },
|
50
|
+
{ :lex => :verb_emotion, :description => "verbs of feeling" },
|
51
|
+
{ :lex => :verb_motion, :description => "verbs of walking, flying, swimming" },
|
52
|
+
{ :lex => :verb_perception, :description => "verbs of seeing, hearing, feeling" },
|
53
|
+
{ :lex => :verb_possession, :description => "verbs of buying, selling, owning" },
|
54
|
+
{ :lex => :verb_social, :description => "verbs of political and social activities and events" },
|
55
|
+
{ :lex => :verb_stative, :description => "verbs of being, having, spatial relations" },
|
56
|
+
{ :lex => :verb_weather, :description => "verbs of raining, snowing, thawing, thundering" },
|
57
|
+
{ :lex => :adj_ppl, :description => "participial adjectives" } ]
|
72
58
|
|
73
|
-
|
59
|
+
def initialize(synset_id, wordnet_connection, homographs)
|
74
60
|
|
75
|
-
|
61
|
+
@wordnet_connection = wordnet_connection
|
62
|
+
@synset_hash = wordnet_connection.synset(synset_id)
|
63
|
+
@homographs = homographs
|
76
64
|
|
77
|
-
|
65
|
+
# construct some conveniance menthods for relation type access
|
66
|
+
Relation::SYMBOL_TO_RELATION.keys.each do |relation_type|
|
67
|
+
self.class.send(:define_method, "#{relation_type}s?") do
|
68
|
+
relations(relation_type).size > 0
|
69
|
+
end
|
70
|
+
self.class.send(:define_method, "#{relation_type}s") do
|
71
|
+
relations(relation_type)
|
72
|
+
end
|
73
|
+
end
|
78
74
|
|
79
|
-
|
75
|
+
end
|
80
76
|
|
81
|
-
|
77
|
+
def synset_type
|
82
78
|
|
83
|
-
|
79
|
+
SYNSET_TYPE_TO_SYMBOL[@synset_hash["synset_type"]]
|
84
80
|
|
85
|
-
|
81
|
+
end
|
86
82
|
|
87
|
-
|
83
|
+
def words
|
88
84
|
|
89
|
-
|
85
|
+
@words ||= map_from_words_with_lexical_ids(:word)
|
90
86
|
|
91
|
-
|
87
|
+
end
|
92
88
|
|
93
|
-
|
89
|
+
def lexical_ids
|
94
90
|
|
95
|
-
|
91
|
+
@words ||= map_from_words_with_lexical_ids(:lexical_id)
|
96
92
|
|
97
|
-
|
93
|
+
end
|
98
94
|
|
99
|
-
|
95
|
+
def size
|
100
96
|
|
101
|
-
|
97
|
+
words.size
|
102
98
|
|
103
|
-
|
99
|
+
end
|
104
100
|
|
105
|
-
|
101
|
+
def words_with_lexical_ids
|
106
102
|
|
107
|
-
|
103
|
+
@words_with_num ||= @synset_hash["words"].split('|').map { |word| word_parts = word.split('.'); { :word => word_parts[0].gsub('_', ' '), :lexical_id => word_parts[1] } }
|
108
104
|
|
109
|
-
|
105
|
+
end
|
110
106
|
|
111
|
-
|
107
|
+
def lexical_filenum
|
112
108
|
|
113
|
-
|
109
|
+
@synset_hash["lexical_filenum"]
|
114
110
|
|
115
|
-
|
111
|
+
end
|
116
112
|
|
117
|
-
|
113
|
+
def lexical_catagory
|
118
114
|
|
119
|
-
|
115
|
+
lexical[:lex]
|
120
116
|
|
121
|
-
|
117
|
+
end
|
122
118
|
|
123
|
-
|
119
|
+
def lexical_description
|
124
120
|
|
125
|
-
|
121
|
+
lexical[:description]
|
126
122
|
|
127
|
-
|
123
|
+
end
|
128
124
|
|
129
|
-
|
125
|
+
def lexical
|
130
126
|
|
131
|
-
|
127
|
+
NUM_TO_LEX[lexical_filenum.to_i]
|
132
128
|
|
133
|
-
|
129
|
+
end
|
134
130
|
|
135
|
-
|
131
|
+
def synset_id
|
136
132
|
|
137
|
-
|
133
|
+
@synset_hash["synset_id"]
|
138
134
|
|
139
|
-
|
135
|
+
end
|
140
136
|
|
141
|
-
|
137
|
+
def gloss
|
142
138
|
|
143
|
-
|
139
|
+
@synset_hash["gloss"]
|
144
140
|
|
145
|
-
|
141
|
+
end
|
146
142
|
|
147
|
-
|
143
|
+
def lemma
|
148
144
|
|
149
|
-
|
145
|
+
@homographs.lemma
|
150
146
|
|
151
|
-
|
147
|
+
end
|
152
148
|
|
153
|
-
|
149
|
+
def homographs
|
154
150
|
|
155
|
-
|
151
|
+
@homographs
|
156
152
|
|
157
|
-
|
153
|
+
end
|
158
154
|
|
159
|
-
|
155
|
+
def inspect
|
160
156
|
|
161
|
-
|
157
|
+
@synset_hash.inspect
|
162
158
|
|
163
|
-
|
159
|
+
end
|
164
160
|
|
165
|
-
|
161
|
+
def relations(type = :all)
|
166
162
|
|
167
|
-
|
163
|
+
@relations ||= @synset_hash["relations"].split('|').map { |relation| Relation.new(relation, self, @wordnet_connection) }
|
168
164
|
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
165
|
+
case
|
166
|
+
when Relation::SYMBOL_TO_RELATION.include?(type.to_sym)
|
167
|
+
@relations.select { |relation| relation.relation_type == type.to_sym }
|
168
|
+
when Relation::RELATION_TO_SYMBOL.include?(type.to_s)
|
169
|
+
@relations.select { |relation| relation.relation_type == Relation::RELATION_TO_SYMBOL[type.to_s] }
|
170
|
+
else
|
171
|
+
@relations
|
172
|
+
end
|
177
173
|
|
178
|
-
|
174
|
+
end
|
179
175
|
|
180
|
-
|
176
|
+
def evocations
|
181
177
|
|
182
|
-
|
183
|
-
|
178
|
+
evocations_arr = @wordnet_connection.evocations(synset_id)
|
179
|
+
Evocations.new(evocations_arr, self, @wordnet_connection) unless evocations_arr.nil?
|
184
180
|
|
185
|
-
|
181
|
+
end
|
186
182
|
|
187
|
-
|
183
|
+
def to_s
|
188
184
|
|
189
|
-
|
185
|
+
@to_s ||= "#{synset_type.to_s.capitalize} including word(s): #{words.map { |word| '"' + word + '"' }.join(', ')} meaning: #{gloss}"
|
190
186
|
|
191
|
-
|
187
|
+
end
|
192
188
|
|
193
|
-
|
189
|
+
alias word lemma
|
190
|
+
|
191
|
+
private
|
194
192
|
|
195
|
-
|
193
|
+
def map_from_words_with_lexical_ids(value)
|
196
194
|
|
197
|
-
|
195
|
+
words_with_lexical_ids.map { |word_with_num| word_with_num[value] }
|
196
|
+
|
197
|
+
end
|
198
|
+
|
199
|
+
end
|
198
200
|
|
199
201
|
end
|