words 0.4.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.markdown CHANGED
@@ -6,7 +6,9 @@ Words implements a fast interface to [Wordnet®](http://wordnet.princeton.edu) w
6
6
 
7
7
  * Version 0.2 Introduced Pure Ruby Backend
8
8
  * Version 0.3 Introduced Evocation Support (see examples & below) as developed by the [Wordnet® Evocation Project](http://wordnet.cs.princeton.edu/downloads/evocation/release-0.4/README.TXT)
9
- * Version 0.4 Substantial performance increase in pure mode (now faster at some things than the tokyo backend) and simplification of use! Full refactoring. Move to RSpec for testing.
9
+ * Version 0.4 Substantial performance increase in pure mode (now faster at some things than the tokyo backend) and simplification of use! Full refactoring. Move to RSpec for testing. API CHANGES: Words::Words -> Words::Wordnet, close -> close!, connected -> connected? and evocations_enabled? -> evocations?
10
+
11
+ Documentation: [Yardoc Live](http://yardoc.org/docs/roja-words)
10
12
 
11
13
  ## Pre-Installation ##
12
14
 
@@ -91,7 +93,7 @@ Heres a few little examples of using words within your programs.
91
93
  require 'rubygems'
92
94
  require 'words'
93
95
 
94
- data = Words::Wordnet.new # or: data = Words::Words.new(:tokyo) for the tokyo backend
96
+ data = Words::Wordnet.new # or: data = Words::Wordnet.new(:tokyo) for the tokyo backend
95
97
 
96
98
  # to specify a wordnet path Words::Words.new(:pure, '/path/to/wordnet')
97
99
  # to specify the tokyo dataset Words::Words.new(:pure, :search, '/path/to/data.tct')
data/Rakefile CHANGED
@@ -1,58 +1,55 @@
1
+ # coding: utf-8
1
2
  require 'rubygems'
2
3
  require 'rake'
3
4
 
4
5
  begin
5
- require 'jeweler'
6
- Jeweler::Tasks.new do |gem|
7
- gem.name = "words"
8
- gem.summary = %Q{A Fast & Easy to use interface to WordNet® with cross ruby distribution compatability.}
9
- gem.description = %Q{Words, with both pure ruby & tokyo-cabinate backends, implements a fast interface to Wordnet® over the same easy-to-use API. The FFI backend makes use of Tokyo Cabinet and the FFI interface, rufus-tokyo, to provide cross ruby distribution compatability and blistering speed. The pure ruby interface operates on a special ruby optimised index along with the basic dictionary files provided by WordNet®. I have attempted to provide ease of use in the form of a simple yet powerful api and installation is a sintch!}
10
- gem.email = "roja@arbia.co.uk"
11
- gem.homepage = "http://github.com/roja/words"
12
- gem.authors = ["Roja Buck"]
13
- gem.add_dependency "trollop", ">= 1.15"
14
- gem.executables = [ "build_wordnet" ]
15
- gem.default_executable = "build_wordnet"
16
- gem.rubyforge_project = 'words'
17
- end
18
- Jeweler::GemcutterTasks.new
19
- Jeweler::RubyforgeTasks.new do |rubyforge|
20
- rubyforge.doc_task = "rdoc"
21
- end
6
+ require 'jeweler'
7
+ Jeweler::Tasks.new do |gem|
8
+ gem.name = "words"
9
+ gem.summary = %Q{A Fast & Easy to use interface to WordNet® with cross ruby distribution compatability.}
10
+ gem.description = %Q{Words, with both pure ruby & tokyo-cabinate backends, implements a fast interface to Wordnet® over the same easy-to-use API. The FFI backend makes use of Tokyo Cabinet and the FFI interface, rufus-tokyo, to provide cross ruby distribution compatability and blistering speed. The pure ruby interface operates on a special ruby optimised index along with the basic dictionary files provided by WordNet®. I have attempted to provide ease of use in the form of a simple yet powerful api and installation is a sintch!}
11
+ gem.email = "roja@arbia.co.uk"
12
+ gem.homepage = "http://github.com/roja/words"
13
+ gem.authors = ["Roja Buck"]
14
+ gem.executables = [ "build_wordnet" ]
15
+ gem.default_executable = "build_wordnet"
16
+ gem.rubyforge_project = 'words'
17
+ gem.add_development_dependency "rspec", ">= 1.2.9"
18
+ end
19
+ Jeweler::GemcutterTasks.new
22
20
  rescue LoadError
23
- puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
21
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
24
22
  end
25
23
 
26
- require 'rake/testtask'
27
- Rake::TestTask.new(:test) do |test|
28
- test.libs << 'lib' << 'test'
29
- test.pattern = 'test/**/test_*.rb'
30
- test.verbose = true
24
+ require 'spec/rake/spectask'
25
+ Spec::Rake::SpecTask.new(:spec) do |spec|
26
+ spec.libs << 'lib' << 'spec'
27
+ spec.spec_files = FileList['spec/**/*_spec.rb']
31
28
  end
32
29
 
33
30
  begin
34
- require 'rcov/rcovtask'
35
- Rcov::RcovTask.new do |test|
36
- test.libs << 'test'
37
- test.pattern = 'test/**/test_*.rb'
38
- test.verbose = true
39
- end
31
+ require 'rcov'
32
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
33
+ spec.libs << 'lib' << 'spec'
34
+ spec.pattern = 'spec/**/*_spec.rb'
35
+ spec.rcov = true
36
+ end
40
37
  rescue LoadError
41
- task :rcov do
42
- abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
43
- end
38
+ task :rcov do
39
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install rcov"
40
+ end
44
41
  end
45
42
 
46
- task :test => :check_dependencies
43
+ task :spec => :check_dependencies
47
44
 
48
- task :default => :test
45
+ task :default => :spec
49
46
 
50
47
  require 'rake/rdoctask'
51
48
  Rake::RDocTask.new do |rdoc|
52
- version = File.exist?('VERSION') ? File.read('VERSION') : ""
53
-
54
- rdoc.rdoc_dir = 'rdoc'
55
- rdoc.title = "words #{version}"
56
- rdoc.rdoc_files.include('README*')
57
- rdoc.rdoc_files.include('lib/**/*.rb')
58
- end
49
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
50
+
51
+ rdoc.rdoc_dir = 'rdoc'
52
+ rdoc.title = "test #{version}"
53
+ rdoc.rdoc_files.include('README*')
54
+ rdoc.rdoc_files.include('lib/**/*.rb')
55
+ end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.0
1
+ 0.4.1
data/bin/build_wordnet CHANGED
@@ -7,9 +7,11 @@ require 'pathname'
7
7
  require 'rubygems'
8
8
 
9
9
  # standard library includes
10
- require 'trollop'
10
+ #require 'trollop'
11
11
  require 'zlib'
12
12
  require 'net/http'
13
+ require 'optparse'
14
+ require 'pp'
13
15
 
14
16
  # local includes
15
17
  require File.join(File.dirname(__FILE__), '..', 'lib', 'words.rb')
@@ -19,16 +21,50 @@ POS_FILE_TYPE_TO_SHORT = { 'adj' => 'a', 'adv' => 'r', 'noun' => 'n', 'verb' =>
19
21
 
20
22
  puts "Words Dataset Constructor 2010 (c) Roja Buck"
21
23
 
22
- opts = Trollop::options do
23
- opt :quiet, "Don't output verbose program detail.", :default => false
24
- opt :wordnet, "Location of the wordnet dictionary directory", :default => "Search..."
25
- opt :build_tokyo, "Build the tokyo wordnet dataset?", :default => false
26
- opt :build_tokyo_with_evocations, "Build the tokyo dataset with the similarity dataset based on the wordnet evocation project (requires internet connection)?", :default => false
27
- opt :build_pure_evocations, "Build the similarity dataset based on the wordnet evocation project for use with the pure words mode (requires internet connection)", :default => false
24
+ opts = { :quiet => false, :build_tokyo => false, :build_tokyo_with_evocations => false, :build_pure_evocations => false, :wordnet => 'Search...' }
25
+
26
+ optparse = OptionParser.new do|option|
27
+
28
+ option.on( '-q', '--quiet', "Don't output verbose program detail. (Default: false)" ) do
29
+ opts[:quiet] = true
30
+ end
31
+
32
+ option.on( '-w', '--wordnet FILE', "Location of the wordnet dictionary directory. (Default: Search)" ) do|f|
33
+ opts[:wordnet] = f
34
+ end
35
+
36
+ option.on( '-t', '--build-tokyo', "Build the tokyo wordnet dataset? (Default: false)" ) do
37
+ opts[:build_tokyo] = true
38
+ end
39
+
40
+ option.on( '-x', '--build-tokyo-with-evocations', "Build the tokyo dataset with the similarity dataset based on the wordnet evocation project? (Default: false) NOTE: requires internet connection." ) do
41
+ opts[:build_tokyo_with_evocations] = true
42
+ end
43
+
44
+ option.on( '-e', '--build-pure-evocations', "Build the similarity dataset based on the wordnet evocation project for use with the pure words mode. (Default: false) NOTE: requires internet connection." ) do
45
+ opts[:build_pure_evocations] = true
46
+ end
47
+
48
+ option.on( '-h', '--help', 'Display this screen' ) do
49
+ puts option
50
+ exit
51
+ end
52
+
53
+ end
54
+
55
+ optparse.parse!
56
+
57
+ if !opts[:build_tokyo] && !opts[:build_tokyo_with_evocations] && !opts[:build_pure_evocations]
58
+ puts "ERROR: You need to specify at least one dataset you want to build."
59
+ exit
28
60
  end
29
- Trollop::die :build_tokyo, "You need to specify which dataset you want to build." if !opts[:build_tokyo] && !opts[:build_tokyo_with_evocations] && !opts[:build_pure_evocations]
30
61
  puts "Verbose mode enabled" if (VERBOSE = !opts[:quiet])
31
62
 
63
+ pp "Options:", opts
64
+
65
+ exit
66
+
67
+
32
68
  require 'rufus-tokyo' if opts[:build_tokyo] || opts[:build_tokyo_with_evocations]
33
69
 
34
70
  gem_path = Pathname.new "#{File.dirname(__FILE__)}/.."
data/examples.rb CHANGED
@@ -1,8 +1,9 @@
1
1
  #!/usr/bin/env ruby
2
+ # coding: utf-8
2
3
 
3
- require 'rubygems'
4
- require 'words'
5
- #require 'lib/words.rb'
4
+ #require 'rubygems'
5
+ #require 'words'
6
+ require 'lib/words.rb'
6
7
 
7
8
  if __FILE__ == $0
8
9
 
data/lib/evocations.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # coding: utf-8
2
+
1
3
  # local includes
2
4
  require File.join(File.dirname(__FILE__), 'synset.rb')
3
5
 
@@ -7,9 +9,7 @@ module Words
7
9
 
8
10
  def initialize(evocation_construct, source_synset, wordnet_connection)
9
11
 
10
- @wordnet_connection = wordnet_connection
11
- @source = source_synset
12
- @evocation_construct = evocation_construct
12
+ @evocation_construct, @source, @wordnet_connection = evocation_construct, source_synset, wordnet_connection
13
13
 
14
14
  end
15
15
 
@@ -17,16 +17,12 @@ module Words
17
17
 
18
18
  @means ||= @evocation_construct["means"].split('|')
19
19
 
20
- @means
21
-
22
20
  end
23
21
 
24
22
  def medians
25
23
 
26
24
  @medians ||= @evocation_construct["medians"].split('|')
27
25
 
28
- @medians
29
-
30
26
  end
31
27
 
32
28
  def size
data/lib/homographs.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # coding: utf-8
2
+
1
3
  # local includes
2
4
  require File.join(File.dirname(__FILE__), 'synset.rb')
3
5
 
@@ -35,32 +37,24 @@ module Words
35
37
 
36
38
  @tagsense_counts ||= @raw_homographs["tagsense_counts"].split('|').map { |count| { POS_TO_SYMBOL[count[0,1]] => count[1..-1].to_i } }
37
39
 
38
- @tagsense_counts
39
-
40
40
  end
41
41
 
42
42
  def lemma
43
43
 
44
44
  @lemma ||= @raw_homographs["lemma"].gsub('_', ' ')
45
45
 
46
- @lemma
47
-
48
46
  end
49
47
 
50
48
  def available_pos
51
49
 
52
50
  @available_pos ||= synset_ids.map { |synset_id| POS_TO_SYMBOL[synset_id[0,1]] }.uniq
53
51
 
54
- @available_pos
55
-
56
52
  end
57
53
 
58
54
  def to_s
59
55
 
60
56
  @to_s ||= [lemma, " " + available_pos.join("/")].join(",")
61
57
 
62
- @to_s
63
-
64
58
  end
65
59
 
66
60
  def size(pos = :all)
data/lib/relation.rb CHANGED
@@ -1,91 +1,90 @@
1
+ # coding: utf-8
2
+
1
3
  # local includes
2
4
  require File.join(File.dirname(__FILE__), 'synset.rb')
3
5
 
4
6
  module Words
5
7
 
6
- class Relation
8
+ class Relation
7
9
 
8
- RELATION_TO_SYMBOL = { "-c" => :member_of_this_domain_topic, "+" => :derivationally_related_form, "%p" => :part_meronym, "~i" => :instance_hyponym, "@" => :hypernym,
9
- ";r" => :domain_of_synset_region, "!" => :antonym, "#p" => :part_holonym, "%s" => :substance_meronym, ";u" => :domain_of_synset_usage,
10
- "-r" => :member_of_this_domain_region, "#s" => :substance_holonym, "=" => :attribute, "-u" => :member_of_this_domain_usage, ";c" => :domain_of_synset_topic,
11
- "%m"=> :member_meronym, "~" => :hyponym, "@i" => :instance_hypernym, "#m" => :member_holonym, "$" => :verb_group, ">" => :cause, "*" => :entailment,
12
- "\\" => :pertainym, "<" => :participle_of_verb, "&" => :similar_to, "^" => :see_also }
13
- SYMBOL_TO_RELATION = RELATION_TO_SYMBOL.invert
10
+ RELATION_TO_SYMBOL = { "-c" => :member_of_this_domain_topic, "+" => :derivationally_related_form, "%p" => :part_meronym, "~i" => :instance_hyponym, "@" => :hypernym,
11
+ ";r" => :domain_of_synset_region, "!" => :antonym, "#p" => :part_holonym, "%s" => :substance_meronym, ";u" => :domain_of_synset_usage,
12
+ "-r" => :member_of_this_domain_region, "#s" => :substance_holonym, "=" => :attribute, "-u" => :member_of_this_domain_usage, ";c" => :domain_of_synset_topic,
13
+ "%m"=> :member_meronym, "~" => :hyponym, "@i" => :instance_hypernym, "#m" => :member_holonym, "$" => :verb_group, ">" => :cause, "*" => :entailment,
14
+ "\\" => :pertainym, "<" => :participle_of_verb, "&" => :similar_to, "^" => :see_also }
15
+ SYMBOL_TO_RELATION = RELATION_TO_SYMBOL.invert
14
16
 
15
- def initialize(relation_construct, source_synset, wordnet_connection)
17
+ def initialize(relation_construct, source_synset, wordnet_connection)
16
18
 
17
- @wordnet_connection = wordnet_connection
18
- @symbol, @dest_synset_id, @pos, @source_dest = relation_construct.split('.')
19
- @dest_synset_id = @pos + @dest_synset_id
20
- @symbol = RELATION_TO_SYMBOL[@symbol]
21
- @source_synset = source_synset
19
+ @wordnet_connection = wordnet_connection
20
+ @symbol, @dest_synset_id, @pos, @source_dest = relation_construct.split('.')
21
+ @dest_synset_id = @pos + @dest_synset_id
22
+ @symbol = RELATION_TO_SYMBOL[@symbol]
23
+ @source_synset = source_synset
22
24
 
23
- end
25
+ end
24
26
 
25
- def is_semantic?
27
+ def is_semantic?
26
28
 
27
- @source_dest == "0000"
29
+ @source_dest == "0000"
28
30
 
29
- end
31
+ end
30
32
 
31
- def source_word
33
+ def source_word
32
34
 
33
- is_semantic? ? @source_word = nil : @source_word = @source_synset.words[@source_dest[0..1].to_i(16)-1] unless defined? @source_word
35
+ return nil if is_semantic?
36
+ @source_word ||= @source_synset.words[@source_dest[0..1].to_i(16)-1]
34
37
 
35
- @source_word
38
+ end
36
39
 
37
- end
40
+ def destination_word
38
41
 
39
- def destination_word
42
+ return nil if is_semantic?
43
+ @destination_word ||= destination.words[@source_dest[2..3].to_i(16)-1]
40
44
 
41
- is_semantic? ? @destination_word = nil : @destination_word = destination.words[@source_dest[2..3].to_i(16)-1] unless defined? @destination_word
45
+ end
42
46
 
43
- @destination_word
47
+ def relation_type?(type)
44
48
 
45
- end
49
+ case
50
+ when SYMBOL_TO_RELATION.include?(type.to_sym)
51
+ type.to_sym == @symbol
52
+ when RELATION_TO_SYMBOL.include?(pos.to_s)
53
+ POINTER_TO_SYMBOL[type.to_sym] == @symbol
54
+ else
55
+ false
56
+ end
46
57
 
47
- def relation_type?(type)
58
+ end
48
59
 
49
- case
50
- when SYMBOL_TO_RELATION.include?(type.to_sym)
51
- type.to_sym == @symbol
52
- when RELATION_TO_SYMBOL.include?(pos.to_s)
53
- POINTER_TO_SYMBOL[type.to_sym] == @symbol
54
- else
55
- false
56
- end
60
+ def relation_type
57
61
 
58
- end
62
+ @symbol
59
63
 
60
- def relation_type
64
+ end
61
65
 
62
- @symbol
66
+ def destination
63
67
 
64
- end
68
+ @destination ||= Synset.new(@dest_synset_id, @wordnet_connection, nil)
65
69
 
66
- def destination
70
+ end
67
71
 
68
- @destination ||= Synset.new(@dest_synset_id, @wordnet_connection, nil)
72
+ def to_s
69
73
 
70
- @destination
74
+ if is_semantic?
75
+ @to_s ||= "Semantic #{relation_type.to_s.gsub('_', ' ')} relation between #{@source_synset.synset_id} and #{@dest_synset_id}"
76
+ else
77
+ @to_s ||= "#{relation_type.to_s.gsub('_', ' ').capitalize} relation between #{@source_synset.synset_id}'s word \"#{source_word}\" and #{@dest_synset_id}'s word \"#{destination_word}\""
78
+ end
71
79
 
72
- end
80
+ end
73
81
 
74
- def to_s
82
+ def inspect
75
83
 
76
- @to_s = "#{relation_type.to_s.gsub('_', ' ').capitalize} relation between #{@source_synset.synset_id}'s word \"#{source_word}\" and #{@dest_synset_id}'s word \"#{destination_word}\"" if !is_semantic? && !defined?(@to_s)
77
- @to_s = "Semantic #{relation_type.to_s.gsub('_', ' ')} relation between #{@source_synset.synset_id} and #{@dest_synset_id}" if is_semantic? && !defined?(@to_s)
78
-
79
- @to_s
80
-
81
- end
82
-
83
- def inspect
84
-
85
- { :symbol => @symbol, :dest_synset_id => @dest_synset_id, :pos => @pos, :source_dest => @source_dest }.inspect
84
+ { :symbol => @symbol, :dest_synset_id => @dest_synset_id, :pos => @pos, :source_dest => @source_dest }.inspect
86
85
 
87
- end
86
+ end
88
87
 
89
- end
88
+ end
90
89
 
91
90
  end
data/lib/synset.rb CHANGED
@@ -1,199 +1,201 @@
1
+ # coding: utf-8
2
+
1
3
  # local includes
2
4
  require File.join(File.dirname(__FILE__), 'relation.rb')
3
5
  require File.join(File.dirname(__FILE__), 'evocations.rb')
4
6
 
5
7
  module Words
6
8
 
7
- class Synset
8
-
9
- SYNSET_TYPE_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb, "s" => :adjective_satallite }
10
- SYNSET_TYPE_TO_NUMBER = { "n" => 1, "v" => 2, "a" => 3, "r" => 4, "s" => 5 }
11
- NUM_TO_LEX = [ { :lex => :adj_all, :description => "all adjective clusters" },
12
- { :lex => :adj_pert, :description => "relational adjectives (pertainyms)" },
13
- { :lex => :adv_all, :description => "all adverbs" },
14
- { :lex => :noun_Tops, :description => "unique beginner for nouns" },
15
- { :lex => :noun_act, :description => "nouns denoting acts or actions" },
16
- { :lex => :noun_animal, :description => "nouns denoting animals" },
17
- { :lex => :noun_artifact, :description => "nouns denoting man-made objects" },
18
- { :lex => :noun_attribute, :description => "nouns denoting attributes of people and objects" },
19
- { :lex => :noun_body, :description => "nouns denoting body parts" },
20
- { :lex => :noun_cognition, :description => "nouns denoting cognitive processes and contents" },
21
- { :lex => :noun_communication, :description => "nouns denoting communicative processes and contents" },
22
- { :lex => :noun_event, :description => "nouns denoting natural events" },
23
- { :lex => :noun_feeling, :description => "nouns denoting feelings and emotions" },
24
- { :lex => :noun_food, :description => "nouns denoting foods and drinks" },
25
- { :lex => :noun_group, :description => "nouns denoting groupings of people or objects" },
26
- { :lex => :noun_location, :description => "nouns denoting spatial position" },
27
- { :lex => :noun_motive, :description => "nouns denoting goals" },
28
- { :lex => :noun_object, :description => "nouns denoting natural objects (not man-made)" },
29
- { :lex => :noun_person, :description => "nouns denoting people" },
30
- { :lex => :noun_phenomenon, :description => "nouns denoting natural phenomena" },
31
- { :lex => :noun_plant, :description => "nouns denoting plants" },
32
- { :lex => :noun_possession, :description => "nouns denoting possession and transfer of possession" },
33
- { :lex => :noun_process, :description => "nouns denoting natural processes" },
34
- { :lex => :noun_quantity, :description => "nouns denoting quantities and units of measure" },
35
- { :lex => :noun_relation, :description => "nouns denoting relations between people or things or ideas" },
36
- { :lex => :noun_shape, :description => "nouns denoting two and three dimensional shapes" },
37
- { :lex => :noun_state, :description => "nouns denoting stable states of affairs" },
38
- { :lex => :noun_substance, :description => "nouns denoting substances" },
39
- { :lex => :noun_time, :description => "nouns denoting time and temporal relations" },
40
- { :lex => :verb_body, :description => "verbs of grooming, dressing and bodily care" },
41
- { :lex => :verb_change, :description => "verbs of size, temperature change, intensifying, etc." },
42
- { :lex => :verb_cognition, :description => "verbs of thinking, judging, analyzing, doubting" },
43
- { :lex => :verb_communication, :description => "verbs of telling, asking, ordering, singing" },
44
- { :lex => :verb_competition, :description => "verbs of fighting, athletic activities" },
45
- { :lex => :verb_consumption, :description => "verbs of eating and drinking" },
46
- { :lex => :verb_contact, :description => "verbs of touching, hitting, tying, digging" },
47
- { :lex => :verb_creation, :description => "verbs of sewing, baking, painting, performing" },
48
- { :lex => :verb_emotion, :description => "verbs of feeling" },
49
- { :lex => :verb_motion, :description => "verbs of walking, flying, swimming" },
50
- { :lex => :verb_perception, :description => "verbs of seeing, hearing, feeling" },
51
- { :lex => :verb_possession, :description => "verbs of buying, selling, owning" },
52
- { :lex => :verb_social, :description => "verbs of political and social activities and events" },
53
- { :lex => :verb_stative, :description => "verbs of being, having, spatial relations" },
54
- { :lex => :verb_weather, :description => "verbs of raining, snowing, thawing, thundering" },
55
- { :lex => :adj_ppl, :description => "participial adjectives" } ]
56
-
57
- def initialize(synset_id, wordnet_connection, homographs)
58
-
59
- @wordnet_connection = wordnet_connection
60
- @synset_hash = wordnet_connection.synset(synset_id)
61
- @homographs = homographs
62
-
63
- # construct some conveniance menthods for relation type access
64
- Relation::SYMBOL_TO_RELATION.keys.each do |relation_type|
65
- self.class.send(:define_method, "#{relation_type}s?") do
66
- relations(relation_type).size > 0
67
- end
68
- self.class.send(:define_method, "#{relation_type}s") do
69
- relations(relation_type)
70
- end
71
- end
9
+ class Synset
10
+
11
+ SYNSET_TYPE_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb, "s" => :adjective_satallite }
12
+ SYNSET_TYPE_TO_NUMBER = { "n" => 1, "v" => 2, "a" => 3, "r" => 4, "s" => 5 }
13
+ NUM_TO_LEX = [ { :lex => :adj_all, :description => "all adjective clusters" },
14
+ { :lex => :adj_pert, :description => "relational adjectives (pertainyms)" },
15
+ { :lex => :adv_all, :description => "all adverbs" },
16
+ { :lex => :noun_Tops, :description => "unique beginner for nouns" },
17
+ { :lex => :noun_act, :description => "nouns denoting acts or actions" },
18
+ { :lex => :noun_animal, :description => "nouns denoting animals" },
19
+ { :lex => :noun_artifact, :description => "nouns denoting man-made objects" },
20
+ { :lex => :noun_attribute, :description => "nouns denoting attributes of people and objects" },
21
+ { :lex => :noun_body, :description => "nouns denoting body parts" },
22
+ { :lex => :noun_cognition, :description => "nouns denoting cognitive processes and contents" },
23
+ { :lex => :noun_communication, :description => "nouns denoting communicative processes and contents" },
24
+ { :lex => :noun_event, :description => "nouns denoting natural events" },
25
+ { :lex => :noun_feeling, :description => "nouns denoting feelings and emotions" },
26
+ { :lex => :noun_food, :description => "nouns denoting foods and drinks" },
27
+ { :lex => :noun_group, :description => "nouns denoting groupings of people or objects" },
28
+ { :lex => :noun_location, :description => "nouns denoting spatial position" },
29
+ { :lex => :noun_motive, :description => "nouns denoting goals" },
30
+ { :lex => :noun_object, :description => "nouns denoting natural objects (not man-made)" },
31
+ { :lex => :noun_person, :description => "nouns denoting people" },
32
+ { :lex => :noun_phenomenon, :description => "nouns denoting natural phenomena" },
33
+ { :lex => :noun_plant, :description => "nouns denoting plants" },
34
+ { :lex => :noun_possession, :description => "nouns denoting possession and transfer of possession" },
35
+ { :lex => :noun_process, :description => "nouns denoting natural processes" },
36
+ { :lex => :noun_quantity, :description => "nouns denoting quantities and units of measure" },
37
+ { :lex => :noun_relation, :description => "nouns denoting relations between people or things or ideas" },
38
+ { :lex => :noun_shape, :description => "nouns denoting two and three dimensional shapes" },
39
+ { :lex => :noun_state, :description => "nouns denoting stable states of affairs" },
40
+ { :lex => :noun_substance, :description => "nouns denoting substances" },
41
+ { :lex => :noun_time, :description => "nouns denoting time and temporal relations" },
42
+ { :lex => :verb_body, :description => "verbs of grooming, dressing and bodily care" },
43
+ { :lex => :verb_change, :description => "verbs of size, temperature change, intensifying, etc." },
44
+ { :lex => :verb_cognition, :description => "verbs of thinking, judging, analyzing, doubting" },
45
+ { :lex => :verb_communication, :description => "verbs of telling, asking, ordering, singing" },
46
+ { :lex => :verb_competition, :description => "verbs of fighting, athletic activities" },
47
+ { :lex => :verb_consumption, :description => "verbs of eating and drinking" },
48
+ { :lex => :verb_contact, :description => "verbs of touching, hitting, tying, digging" },
49
+ { :lex => :verb_creation, :description => "verbs of sewing, baking, painting, performing" },
50
+ { :lex => :verb_emotion, :description => "verbs of feeling" },
51
+ { :lex => :verb_motion, :description => "verbs of walking, flying, swimming" },
52
+ { :lex => :verb_perception, :description => "verbs of seeing, hearing, feeling" },
53
+ { :lex => :verb_possession, :description => "verbs of buying, selling, owning" },
54
+ { :lex => :verb_social, :description => "verbs of political and social activities and events" },
55
+ { :lex => :verb_stative, :description => "verbs of being, having, spatial relations" },
56
+ { :lex => :verb_weather, :description => "verbs of raining, snowing, thawing, thundering" },
57
+ { :lex => :adj_ppl, :description => "participial adjectives" } ]
72
58
 
73
- end
59
+ def initialize(synset_id, wordnet_connection, homographs)
74
60
 
75
- def synset_type
61
+ @wordnet_connection = wordnet_connection
62
+ @synset_hash = wordnet_connection.synset(synset_id)
63
+ @homographs = homographs
76
64
 
77
- SYNSET_TYPE_TO_SYMBOL[@synset_hash["synset_type"]]
65
+ # construct some conveniance menthods for relation type access
66
+ Relation::SYMBOL_TO_RELATION.keys.each do |relation_type|
67
+ self.class.send(:define_method, "#{relation_type}s?") do
68
+ relations(relation_type).size > 0
69
+ end
70
+ self.class.send(:define_method, "#{relation_type}s") do
71
+ relations(relation_type)
72
+ end
73
+ end
78
74
 
79
- end
75
+ end
80
76
 
81
- def words
77
+ def synset_type
82
78
 
83
- @words ||= words_with_lexical_ids.map { |word_with_num| word_with_num[:word] }
79
+ SYNSET_TYPE_TO_SYMBOL[@synset_hash["synset_type"]]
84
80
 
85
- @words
81
+ end
86
82
 
87
- end
83
+ def words
88
84
 
89
- def lexical_ids
85
+ @words ||= map_from_words_with_lexical_ids(:word)
90
86
 
91
- @words ||= words_with_lexical_ids.map { |word_with_num| word_with_num[:lexical_id] }
87
+ end
92
88
 
93
- @words
89
+ def lexical_ids
94
90
 
95
- end
91
+ @words ||= map_from_words_with_lexical_ids(:lexical_id)
96
92
 
97
- def size
93
+ end
98
94
 
99
- words.size
95
+ def size
100
96
 
101
- end
97
+ words.size
102
98
 
103
- def words_with_lexical_ids
99
+ end
104
100
 
105
- @words_with_num ||= @synset_hash["words"].split('|').map { |word| word_parts = word.split('.'); { :word => word_parts[0].gsub('_', ' '), :lexical_id => word_parts[1] } }
101
+ def words_with_lexical_ids
106
102
 
107
- @words_with_num
103
+ @words_with_num ||= @synset_hash["words"].split('|').map { |word| word_parts = word.split('.'); { :word => word_parts[0].gsub('_', ' '), :lexical_id => word_parts[1] } }
108
104
 
109
- end
105
+ end
110
106
 
111
- def lexical_filenum
107
+ def lexical_filenum
112
108
 
113
- @synset_hash["lexical_filenum"]
109
+ @synset_hash["lexical_filenum"]
114
110
 
115
- end
111
+ end
116
112
 
117
- def lexical_catagory
113
+ def lexical_catagory
118
114
 
119
- lexical[:lex]
115
+ lexical[:lex]
120
116
 
121
- end
117
+ end
122
118
 
123
- def lexical_description
119
+ def lexical_description
124
120
 
125
- lexical[:description]
121
+ lexical[:description]
126
122
 
127
- end
123
+ end
128
124
 
129
- def lexical
125
+ def lexical
130
126
 
131
- NUM_TO_LEX[lexical_filenum.to_i]
127
+ NUM_TO_LEX[lexical_filenum.to_i]
132
128
 
133
- end
129
+ end
134
130
 
135
- def synset_id
131
+ def synset_id
136
132
 
137
- @synset_hash["synset_id"]
133
+ @synset_hash["synset_id"]
138
134
 
139
- end
135
+ end
140
136
 
141
- def gloss
137
+ def gloss
142
138
 
143
- @synset_hash["gloss"]
139
+ @synset_hash["gloss"]
144
140
 
145
- end
141
+ end
146
142
 
147
- def lemma
143
+ def lemma
148
144
 
149
- @homographs.lemma
145
+ @homographs.lemma
150
146
 
151
- end
147
+ end
152
148
 
153
- def homographs
149
+ def homographs
154
150
 
155
- @homographs
151
+ @homographs
156
152
 
157
- end
153
+ end
158
154
 
159
- def inspect
155
+ def inspect
160
156
 
161
- @synset_hash.inspect
157
+ @synset_hash.inspect
162
158
 
163
- end
159
+ end
164
160
 
165
- def relations(type = :all)
161
+ def relations(type = :all)
166
162
 
167
- @relations ||= @synset_hash["relations"].split('|').map { |relation| Relation.new(relation, self, @wordnet_connection) }
163
+ @relations ||= @synset_hash["relations"].split('|').map { |relation| Relation.new(relation, self, @wordnet_connection) }
168
164
 
169
- case
170
- when Relation::SYMBOL_TO_RELATION.include?(type.to_sym)
171
- @relations.select { |relation| relation.relation_type == type.to_sym }
172
- when Relation::RELATION_TO_SYMBOL.include?(type.to_s)
173
- @relations.select { |relation| relation.relation_type == Relation::RELATION_TO_SYMBOL[type.to_s] }
174
- else
175
- @relations
176
- end
165
+ case
166
+ when Relation::SYMBOL_TO_RELATION.include?(type.to_sym)
167
+ @relations.select { |relation| relation.relation_type == type.to_sym }
168
+ when Relation::RELATION_TO_SYMBOL.include?(type.to_s)
169
+ @relations.select { |relation| relation.relation_type == Relation::RELATION_TO_SYMBOL[type.to_s] }
170
+ else
171
+ @relations
172
+ end
177
173
 
178
- end
174
+ end
179
175
 
180
- def evocations
176
+ def evocations
181
177
 
182
- evocations_arr = @wordnet_connection.evocations(synset_id)
183
- Evocations.new(evocations_arr, self, @wordnet_connection) unless evocations_arr.nil?
178
+ evocations_arr = @wordnet_connection.evocations(synset_id)
179
+ Evocations.new(evocations_arr, self, @wordnet_connection) unless evocations_arr.nil?
184
180
 
185
- end
181
+ end
186
182
 
187
- def to_s
183
+ def to_s
188
184
 
189
- @to_s ||= "#{synset_type.to_s.capitalize} including word(s): #{words.map { |word| '"' + word + '"' }.join(', ')} meaning: #{gloss}"
185
+ @to_s ||= "#{synset_type.to_s.capitalize} including word(s): #{words.map { |word| '"' + word + '"' }.join(', ')} meaning: #{gloss}"
190
186
 
191
- @to_s
187
+ end
192
188
 
193
- end
189
+ alias word lemma
190
+
191
+ private
194
192
 
195
- alias word lemma
193
+ def map_from_words_with_lexical_ids(value)
196
194
 
197
- end
195
+ words_with_lexical_ids.map { |word_with_num| word_with_num[value] }
196
+
197
+ end
198
+
199
+ end
198
200
 
199
201
  end