words 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.markdown CHANGED
@@ -6,7 +6,9 @@ Words implements a fast interface to [Wordnet®](http://wordnet.princeton.edu) w
6
6
 
7
7
  * Version 0.2 Introduced Pure Ruby Backend
8
8
  * Version 0.3 Introduced Evocation Support (see examples & below) as developed by the [Wordnet® Evocation Project](http://wordnet.cs.princeton.edu/downloads/evocation/release-0.4/README.TXT)
9
- * Version 0.4 Substantial performance increase in pure mode (now faster at some things than the tokyo backend) and simplification of use! Full refactoring. Move to RSpec for testing.
9
+ * Version 0.4 Substantial performance increase in pure mode (now faster at some things than the tokyo backend) and simplification of use! Full refactoring. Move to RSpec for testing. API CHANGES: Words::Words -> Words::Wordnet, close -> close!, connected -> connected? and evocations_enabled? -> evocations?
10
+
11
+ Documentation: [Yardoc Live](http://yardoc.org/docs/roja-words)
10
12
 
11
13
  ## Pre-Installation ##
12
14
 
@@ -91,7 +93,7 @@ Heres a few little examples of using words within your programs.
91
93
  require 'rubygems'
92
94
  require 'words'
93
95
 
94
- data = Words::Wordnet.new # or: data = Words::Words.new(:tokyo) for the tokyo backend
96
+ data = Words::Wordnet.new # or: data = Words::Wordnet.new(:tokyo) for the tokyo backend
95
97
 
96
98
  # to specify a wordnet path Words::Words.new(:pure, '/path/to/wordnet')
97
99
  # to specify the tokyo dataset Words::Words.new(:pure, :search, '/path/to/data.tct')
data/Rakefile CHANGED
@@ -1,58 +1,55 @@
1
+ # coding: utf-8
1
2
  require 'rubygems'
2
3
  require 'rake'
3
4
 
4
5
  begin
5
- require 'jeweler'
6
- Jeweler::Tasks.new do |gem|
7
- gem.name = "words"
8
- gem.summary = %Q{A Fast & Easy to use interface to WordNet® with cross ruby distribution compatability.}
9
- gem.description = %Q{Words, with both pure ruby & tokyo-cabinate backends, implements a fast interface to Wordnet® over the same easy-to-use API. The FFI backend makes use of Tokyo Cabinet and the FFI interface, rufus-tokyo, to provide cross ruby distribution compatability and blistering speed. The pure ruby interface operates on a special ruby optimised index along with the basic dictionary files provided by WordNet®. I have attempted to provide ease of use in the form of a simple yet powerful api and installation is a sintch!}
10
- gem.email = "roja@arbia.co.uk"
11
- gem.homepage = "http://github.com/roja/words"
12
- gem.authors = ["Roja Buck"]
13
- gem.add_dependency "trollop", ">= 1.15"
14
- gem.executables = [ "build_wordnet" ]
15
- gem.default_executable = "build_wordnet"
16
- gem.rubyforge_project = 'words'
17
- end
18
- Jeweler::GemcutterTasks.new
19
- Jeweler::RubyforgeTasks.new do |rubyforge|
20
- rubyforge.doc_task = "rdoc"
21
- end
6
+ require 'jeweler'
7
+ Jeweler::Tasks.new do |gem|
8
+ gem.name = "words"
9
+ gem.summary = %Q{A Fast & Easy to use interface to WordNet® with cross ruby distribution compatability.}
10
+ gem.description = %Q{Words, with both pure ruby & tokyo-cabinate backends, implements a fast interface to Wordnet® over the same easy-to-use API. The FFI backend makes use of Tokyo Cabinet and the FFI interface, rufus-tokyo, to provide cross ruby distribution compatability and blistering speed. The pure ruby interface operates on a special ruby optimised index along with the basic dictionary files provided by WordNet®. I have attempted to provide ease of use in the form of a simple yet powerful api and installation is a sintch!}
11
+ gem.email = "roja@arbia.co.uk"
12
+ gem.homepage = "http://github.com/roja/words"
13
+ gem.authors = ["Roja Buck"]
14
+ gem.executables = [ "build_wordnet" ]
15
+ gem.default_executable = "build_wordnet"
16
+ gem.rubyforge_project = 'words'
17
+ gem.add_development_dependency "rspec", ">= 1.2.9"
18
+ end
19
+ Jeweler::GemcutterTasks.new
22
20
  rescue LoadError
23
- puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
21
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
24
22
  end
25
23
 
26
- require 'rake/testtask'
27
- Rake::TestTask.new(:test) do |test|
28
- test.libs << 'lib' << 'test'
29
- test.pattern = 'test/**/test_*.rb'
30
- test.verbose = true
24
+ require 'spec/rake/spectask'
25
+ Spec::Rake::SpecTask.new(:spec) do |spec|
26
+ spec.libs << 'lib' << 'spec'
27
+ spec.spec_files = FileList['spec/**/*_spec.rb']
31
28
  end
32
29
 
33
30
  begin
34
- require 'rcov/rcovtask'
35
- Rcov::RcovTask.new do |test|
36
- test.libs << 'test'
37
- test.pattern = 'test/**/test_*.rb'
38
- test.verbose = true
39
- end
31
+ require 'rcov'
32
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
33
+ spec.libs << 'lib' << 'spec'
34
+ spec.pattern = 'spec/**/*_spec.rb'
35
+ spec.rcov = true
36
+ end
40
37
  rescue LoadError
41
- task :rcov do
42
- abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
43
- end
38
+ task :rcov do
39
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install rcov"
40
+ end
44
41
  end
45
42
 
46
- task :test => :check_dependencies
43
+ task :spec => :check_dependencies
47
44
 
48
- task :default => :test
45
+ task :default => :spec
49
46
 
50
47
  require 'rake/rdoctask'
51
48
  Rake::RDocTask.new do |rdoc|
52
- version = File.exist?('VERSION') ? File.read('VERSION') : ""
53
-
54
- rdoc.rdoc_dir = 'rdoc'
55
- rdoc.title = "words #{version}"
56
- rdoc.rdoc_files.include('README*')
57
- rdoc.rdoc_files.include('lib/**/*.rb')
58
- end
49
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
50
+
51
+ rdoc.rdoc_dir = 'rdoc'
52
+ rdoc.title = "test #{version}"
53
+ rdoc.rdoc_files.include('README*')
54
+ rdoc.rdoc_files.include('lib/**/*.rb')
55
+ end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.0
1
+ 0.4.1
data/bin/build_wordnet CHANGED
@@ -7,9 +7,11 @@ require 'pathname'
7
7
  require 'rubygems'
8
8
 
9
9
  # standard library includes
10
- require 'trollop'
10
+ #require 'trollop'
11
11
  require 'zlib'
12
12
  require 'net/http'
13
+ require 'optparse'
14
+ require 'pp'
13
15
 
14
16
  # local includes
15
17
  require File.join(File.dirname(__FILE__), '..', 'lib', 'words.rb')
@@ -19,16 +21,50 @@ POS_FILE_TYPE_TO_SHORT = { 'adj' => 'a', 'adv' => 'r', 'noun' => 'n', 'verb' =>
19
21
 
20
22
  puts "Words Dataset Constructor 2010 (c) Roja Buck"
21
23
 
22
- opts = Trollop::options do
23
- opt :quiet, "Don't output verbose program detail.", :default => false
24
- opt :wordnet, "Location of the wordnet dictionary directory", :default => "Search..."
25
- opt :build_tokyo, "Build the tokyo wordnet dataset?", :default => false
26
- opt :build_tokyo_with_evocations, "Build the tokyo dataset with the similarity dataset based on the wordnet evocation project (requires internet connection)?", :default => false
27
- opt :build_pure_evocations, "Build the similarity dataset based on the wordnet evocation project for use with the pure words mode (requires internet connection)", :default => false
24
+ opts = { :quiet => false, :build_tokyo => false, :build_tokyo_with_evocations => false, :build_pure_evocations => false, :wordnet => 'Search...' }
25
+
26
+ optparse = OptionParser.new do|option|
27
+
28
+ option.on( '-q', '--quiet', "Don't output verbose program detail. (Default: false)" ) do
29
+ opts[:quiet] = true
30
+ end
31
+
32
+ option.on( '-w', '--wordnet FILE', "Location of the wordnet dictionary directory. (Default: Search)" ) do|f|
33
+ opts[:wordnet] = f
34
+ end
35
+
36
+ option.on( '-t', '--build-tokyo', "Build the tokyo wordnet dataset? (Default: false)" ) do
37
+ opts[:build_tokyo] = true
38
+ end
39
+
40
+ option.on( '-x', '--build-tokyo-with-evocations', "Build the tokyo dataset with the similarity dataset based on the wordnet evocation project? (Default: false) NOTE: requires internet connection." ) do
41
+ opts[:build_tokyo_with_evocations] = true
42
+ end
43
+
44
+ option.on( '-e', '--build-pure-evocations', "Build the similarity dataset based on the wordnet evocation project for use with the pure words mode. (Default: false) NOTE: requires internet connection." ) do
45
+ opts[:build_pure_evocations] = true
46
+ end
47
+
48
+ option.on( '-h', '--help', 'Display this screen' ) do
49
+ puts option
50
+ exit
51
+ end
52
+
53
+ end
54
+
55
+ optparse.parse!
56
+
57
+ if !opts[:build_tokyo] && !opts[:build_tokyo_with_evocations] && !opts[:build_pure_evocations]
58
+ puts "ERROR: You need to specify at least one dataset you want to build."
59
+ exit
28
60
  end
29
- Trollop::die :build_tokyo, "You need to specify which dataset you want to build." if !opts[:build_tokyo] && !opts[:build_tokyo_with_evocations] && !opts[:build_pure_evocations]
30
61
  puts "Verbose mode enabled" if (VERBOSE = !opts[:quiet])
31
62
 
63
+ pp "Options:", opts
64
+
65
+ exit
66
+
67
+
32
68
  require 'rufus-tokyo' if opts[:build_tokyo] || opts[:build_tokyo_with_evocations]
33
69
 
34
70
  gem_path = Pathname.new "#{File.dirname(__FILE__)}/.."
data/examples.rb CHANGED
@@ -1,8 +1,9 @@
1
1
  #!/usr/bin/env ruby
2
+ # coding: utf-8
2
3
 
3
- require 'rubygems'
4
- require 'words'
5
- #require 'lib/words.rb'
4
+ #require 'rubygems'
5
+ #require 'words'
6
+ require 'lib/words.rb'
6
7
 
7
8
  if __FILE__ == $0
8
9
 
data/lib/evocations.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # coding: utf-8
2
+
1
3
  # local includes
2
4
  require File.join(File.dirname(__FILE__), 'synset.rb')
3
5
 
@@ -7,9 +9,7 @@ module Words
7
9
 
8
10
  def initialize(evocation_construct, source_synset, wordnet_connection)
9
11
 
10
- @wordnet_connection = wordnet_connection
11
- @source = source_synset
12
- @evocation_construct = evocation_construct
12
+ @evocation_construct, @source, @wordnet_connection = evocation_construct, source_synset, wordnet_connection
13
13
 
14
14
  end
15
15
 
@@ -17,16 +17,12 @@ module Words
17
17
 
18
18
  @means ||= @evocation_construct["means"].split('|')
19
19
 
20
- @means
21
-
22
20
  end
23
21
 
24
22
  def medians
25
23
 
26
24
  @medians ||= @evocation_construct["medians"].split('|')
27
25
 
28
- @medians
29
-
30
26
  end
31
27
 
32
28
  def size
data/lib/homographs.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # coding: utf-8
2
+
1
3
  # local includes
2
4
  require File.join(File.dirname(__FILE__), 'synset.rb')
3
5
 
@@ -35,32 +37,24 @@ module Words
35
37
 
36
38
  @tagsense_counts ||= @raw_homographs["tagsense_counts"].split('|').map { |count| { POS_TO_SYMBOL[count[0,1]] => count[1..-1].to_i } }
37
39
 
38
- @tagsense_counts
39
-
40
40
  end
41
41
 
42
42
  def lemma
43
43
 
44
44
  @lemma ||= @raw_homographs["lemma"].gsub('_', ' ')
45
45
 
46
- @lemma
47
-
48
46
  end
49
47
 
50
48
  def available_pos
51
49
 
52
50
  @available_pos ||= synset_ids.map { |synset_id| POS_TO_SYMBOL[synset_id[0,1]] }.uniq
53
51
 
54
- @available_pos
55
-
56
52
  end
57
53
 
58
54
  def to_s
59
55
 
60
56
  @to_s ||= [lemma, " " + available_pos.join("/")].join(",")
61
57
 
62
- @to_s
63
-
64
58
  end
65
59
 
66
60
  def size(pos = :all)
data/lib/relation.rb CHANGED
@@ -1,91 +1,90 @@
1
+ # coding: utf-8
2
+
1
3
  # local includes
2
4
  require File.join(File.dirname(__FILE__), 'synset.rb')
3
5
 
4
6
  module Words
5
7
 
6
- class Relation
8
+ class Relation
7
9
 
8
- RELATION_TO_SYMBOL = { "-c" => :member_of_this_domain_topic, "+" => :derivationally_related_form, "%p" => :part_meronym, "~i" => :instance_hyponym, "@" => :hypernym,
9
- ";r" => :domain_of_synset_region, "!" => :antonym, "#p" => :part_holonym, "%s" => :substance_meronym, ";u" => :domain_of_synset_usage,
10
- "-r" => :member_of_this_domain_region, "#s" => :substance_holonym, "=" => :attribute, "-u" => :member_of_this_domain_usage, ";c" => :domain_of_synset_topic,
11
- "%m"=> :member_meronym, "~" => :hyponym, "@i" => :instance_hypernym, "#m" => :member_holonym, "$" => :verb_group, ">" => :cause, "*" => :entailment,
12
- "\\" => :pertainym, "<" => :participle_of_verb, "&" => :similar_to, "^" => :see_also }
13
- SYMBOL_TO_RELATION = RELATION_TO_SYMBOL.invert
10
+ RELATION_TO_SYMBOL = { "-c" => :member_of_this_domain_topic, "+" => :derivationally_related_form, "%p" => :part_meronym, "~i" => :instance_hyponym, "@" => :hypernym,
11
+ ";r" => :domain_of_synset_region, "!" => :antonym, "#p" => :part_holonym, "%s" => :substance_meronym, ";u" => :domain_of_synset_usage,
12
+ "-r" => :member_of_this_domain_region, "#s" => :substance_holonym, "=" => :attribute, "-u" => :member_of_this_domain_usage, ";c" => :domain_of_synset_topic,
13
+ "%m"=> :member_meronym, "~" => :hyponym, "@i" => :instance_hypernym, "#m" => :member_holonym, "$" => :verb_group, ">" => :cause, "*" => :entailment,
14
+ "\\" => :pertainym, "<" => :participle_of_verb, "&" => :similar_to, "^" => :see_also }
15
+ SYMBOL_TO_RELATION = RELATION_TO_SYMBOL.invert
14
16
 
15
- def initialize(relation_construct, source_synset, wordnet_connection)
17
+ def initialize(relation_construct, source_synset, wordnet_connection)
16
18
 
17
- @wordnet_connection = wordnet_connection
18
- @symbol, @dest_synset_id, @pos, @source_dest = relation_construct.split('.')
19
- @dest_synset_id = @pos + @dest_synset_id
20
- @symbol = RELATION_TO_SYMBOL[@symbol]
21
- @source_synset = source_synset
19
+ @wordnet_connection = wordnet_connection
20
+ @symbol, @dest_synset_id, @pos, @source_dest = relation_construct.split('.')
21
+ @dest_synset_id = @pos + @dest_synset_id
22
+ @symbol = RELATION_TO_SYMBOL[@symbol]
23
+ @source_synset = source_synset
22
24
 
23
- end
25
+ end
24
26
 
25
- def is_semantic?
27
+ def is_semantic?
26
28
 
27
- @source_dest == "0000"
29
+ @source_dest == "0000"
28
30
 
29
- end
31
+ end
30
32
 
31
- def source_word
33
+ def source_word
32
34
 
33
- is_semantic? ? @source_word = nil : @source_word = @source_synset.words[@source_dest[0..1].to_i(16)-1] unless defined? @source_word
35
+ return nil if is_semantic?
36
+ @source_word ||= @source_synset.words[@source_dest[0..1].to_i(16)-1]
34
37
 
35
- @source_word
38
+ end
36
39
 
37
- end
40
+ def destination_word
38
41
 
39
- def destination_word
42
+ return nil if is_semantic?
43
+ @destination_word ||= destination.words[@source_dest[2..3].to_i(16)-1]
40
44
 
41
- is_semantic? ? @destination_word = nil : @destination_word = destination.words[@source_dest[2..3].to_i(16)-1] unless defined? @destination_word
45
+ end
42
46
 
43
- @destination_word
47
+ def relation_type?(type)
44
48
 
45
- end
49
+ case
50
+ when SYMBOL_TO_RELATION.include?(type.to_sym)
51
+ type.to_sym == @symbol
52
+ when RELATION_TO_SYMBOL.include?(pos.to_s)
53
+ POINTER_TO_SYMBOL[type.to_sym] == @symbol
54
+ else
55
+ false
56
+ end
46
57
 
47
- def relation_type?(type)
58
+ end
48
59
 
49
- case
50
- when SYMBOL_TO_RELATION.include?(type.to_sym)
51
- type.to_sym == @symbol
52
- when RELATION_TO_SYMBOL.include?(pos.to_s)
53
- POINTER_TO_SYMBOL[type.to_sym] == @symbol
54
- else
55
- false
56
- end
60
+ def relation_type
57
61
 
58
- end
62
+ @symbol
59
63
 
60
- def relation_type
64
+ end
61
65
 
62
- @symbol
66
+ def destination
63
67
 
64
- end
68
+ @destination ||= Synset.new(@dest_synset_id, @wordnet_connection, nil)
65
69
 
66
- def destination
70
+ end
67
71
 
68
- @destination ||= Synset.new(@dest_synset_id, @wordnet_connection, nil)
72
+ def to_s
69
73
 
70
- @destination
74
+ if is_semantic?
75
+ @to_s ||= "Semantic #{relation_type.to_s.gsub('_', ' ')} relation between #{@source_synset.synset_id} and #{@dest_synset_id}"
76
+ else
77
+ @to_s ||= "#{relation_type.to_s.gsub('_', ' ').capitalize} relation between #{@source_synset.synset_id}'s word \"#{source_word}\" and #{@dest_synset_id}'s word \"#{destination_word}\""
78
+ end
71
79
 
72
- end
80
+ end
73
81
 
74
- def to_s
82
+ def inspect
75
83
 
76
- @to_s = "#{relation_type.to_s.gsub('_', ' ').capitalize} relation between #{@source_synset.synset_id}'s word \"#{source_word}\" and #{@dest_synset_id}'s word \"#{destination_word}\"" if !is_semantic? && !defined?(@to_s)
77
- @to_s = "Semantic #{relation_type.to_s.gsub('_', ' ')} relation between #{@source_synset.synset_id} and #{@dest_synset_id}" if is_semantic? && !defined?(@to_s)
78
-
79
- @to_s
80
-
81
- end
82
-
83
- def inspect
84
-
85
- { :symbol => @symbol, :dest_synset_id => @dest_synset_id, :pos => @pos, :source_dest => @source_dest }.inspect
84
+ { :symbol => @symbol, :dest_synset_id => @dest_synset_id, :pos => @pos, :source_dest => @source_dest }.inspect
86
85
 
87
- end
86
+ end
88
87
 
89
- end
88
+ end
90
89
 
91
90
  end
data/lib/synset.rb CHANGED
@@ -1,199 +1,201 @@
1
+ # coding: utf-8
2
+
1
3
  # local includes
2
4
  require File.join(File.dirname(__FILE__), 'relation.rb')
3
5
  require File.join(File.dirname(__FILE__), 'evocations.rb')
4
6
 
5
7
  module Words
6
8
 
7
- class Synset
8
-
9
- SYNSET_TYPE_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb, "s" => :adjective_satallite }
10
- SYNSET_TYPE_TO_NUMBER = { "n" => 1, "v" => 2, "a" => 3, "r" => 4, "s" => 5 }
11
- NUM_TO_LEX = [ { :lex => :adj_all, :description => "all adjective clusters" },
12
- { :lex => :adj_pert, :description => "relational adjectives (pertainyms)" },
13
- { :lex => :adv_all, :description => "all adverbs" },
14
- { :lex => :noun_Tops, :description => "unique beginner for nouns" },
15
- { :lex => :noun_act, :description => "nouns denoting acts or actions" },
16
- { :lex => :noun_animal, :description => "nouns denoting animals" },
17
- { :lex => :noun_artifact, :description => "nouns denoting man-made objects" },
18
- { :lex => :noun_attribute, :description => "nouns denoting attributes of people and objects" },
19
- { :lex => :noun_body, :description => "nouns denoting body parts" },
20
- { :lex => :noun_cognition, :description => "nouns denoting cognitive processes and contents" },
21
- { :lex => :noun_communication, :description => "nouns denoting communicative processes and contents" },
22
- { :lex => :noun_event, :description => "nouns denoting natural events" },
23
- { :lex => :noun_feeling, :description => "nouns denoting feelings and emotions" },
24
- { :lex => :noun_food, :description => "nouns denoting foods and drinks" },
25
- { :lex => :noun_group, :description => "nouns denoting groupings of people or objects" },
26
- { :lex => :noun_location, :description => "nouns denoting spatial position" },
27
- { :lex => :noun_motive, :description => "nouns denoting goals" },
28
- { :lex => :noun_object, :description => "nouns denoting natural objects (not man-made)" },
29
- { :lex => :noun_person, :description => "nouns denoting people" },
30
- { :lex => :noun_phenomenon, :description => "nouns denoting natural phenomena" },
31
- { :lex => :noun_plant, :description => "nouns denoting plants" },
32
- { :lex => :noun_possession, :description => "nouns denoting possession and transfer of possession" },
33
- { :lex => :noun_process, :description => "nouns denoting natural processes" },
34
- { :lex => :noun_quantity, :description => "nouns denoting quantities and units of measure" },
35
- { :lex => :noun_relation, :description => "nouns denoting relations between people or things or ideas" },
36
- { :lex => :noun_shape, :description => "nouns denoting two and three dimensional shapes" },
37
- { :lex => :noun_state, :description => "nouns denoting stable states of affairs" },
38
- { :lex => :noun_substance, :description => "nouns denoting substances" },
39
- { :lex => :noun_time, :description => "nouns denoting time and temporal relations" },
40
- { :lex => :verb_body, :description => "verbs of grooming, dressing and bodily care" },
41
- { :lex => :verb_change, :description => "verbs of size, temperature change, intensifying, etc." },
42
- { :lex => :verb_cognition, :description => "verbs of thinking, judging, analyzing, doubting" },
43
- { :lex => :verb_communication, :description => "verbs of telling, asking, ordering, singing" },
44
- { :lex => :verb_competition, :description => "verbs of fighting, athletic activities" },
45
- { :lex => :verb_consumption, :description => "verbs of eating and drinking" },
46
- { :lex => :verb_contact, :description => "verbs of touching, hitting, tying, digging" },
47
- { :lex => :verb_creation, :description => "verbs of sewing, baking, painting, performing" },
48
- { :lex => :verb_emotion, :description => "verbs of feeling" },
49
- { :lex => :verb_motion, :description => "verbs of walking, flying, swimming" },
50
- { :lex => :verb_perception, :description => "verbs of seeing, hearing, feeling" },
51
- { :lex => :verb_possession, :description => "verbs of buying, selling, owning" },
52
- { :lex => :verb_social, :description => "verbs of political and social activities and events" },
53
- { :lex => :verb_stative, :description => "verbs of being, having, spatial relations" },
54
- { :lex => :verb_weather, :description => "verbs of raining, snowing, thawing, thundering" },
55
- { :lex => :adj_ppl, :description => "participial adjectives" } ]
56
-
57
- def initialize(synset_id, wordnet_connection, homographs)
58
-
59
- @wordnet_connection = wordnet_connection
60
- @synset_hash = wordnet_connection.synset(synset_id)
61
- @homographs = homographs
62
-
63
- # construct some conveniance menthods for relation type access
64
- Relation::SYMBOL_TO_RELATION.keys.each do |relation_type|
65
- self.class.send(:define_method, "#{relation_type}s?") do
66
- relations(relation_type).size > 0
67
- end
68
- self.class.send(:define_method, "#{relation_type}s") do
69
- relations(relation_type)
70
- end
71
- end
9
+ class Synset
10
+
11
+ SYNSET_TYPE_TO_SYMBOL = {"n" => :noun, "v" => :verb, "a" => :adjective, "r" => :adverb, "s" => :adjective_satallite }
12
+ SYNSET_TYPE_TO_NUMBER = { "n" => 1, "v" => 2, "a" => 3, "r" => 4, "s" => 5 }
13
+ NUM_TO_LEX = [ { :lex => :adj_all, :description => "all adjective clusters" },
14
+ { :lex => :adj_pert, :description => "relational adjectives (pertainyms)" },
15
+ { :lex => :adv_all, :description => "all adverbs" },
16
+ { :lex => :noun_Tops, :description => "unique beginner for nouns" },
17
+ { :lex => :noun_act, :description => "nouns denoting acts or actions" },
18
+ { :lex => :noun_animal, :description => "nouns denoting animals" },
19
+ { :lex => :noun_artifact, :description => "nouns denoting man-made objects" },
20
+ { :lex => :noun_attribute, :description => "nouns denoting attributes of people and objects" },
21
+ { :lex => :noun_body, :description => "nouns denoting body parts" },
22
+ { :lex => :noun_cognition, :description => "nouns denoting cognitive processes and contents" },
23
+ { :lex => :noun_communication, :description => "nouns denoting communicative processes and contents" },
24
+ { :lex => :noun_event, :description => "nouns denoting natural events" },
25
+ { :lex => :noun_feeling, :description => "nouns denoting feelings and emotions" },
26
+ { :lex => :noun_food, :description => "nouns denoting foods and drinks" },
27
+ { :lex => :noun_group, :description => "nouns denoting groupings of people or objects" },
28
+ { :lex => :noun_location, :description => "nouns denoting spatial position" },
29
+ { :lex => :noun_motive, :description => "nouns denoting goals" },
30
+ { :lex => :noun_object, :description => "nouns denoting natural objects (not man-made)" },
31
+ { :lex => :noun_person, :description => "nouns denoting people" },
32
+ { :lex => :noun_phenomenon, :description => "nouns denoting natural phenomena" },
33
+ { :lex => :noun_plant, :description => "nouns denoting plants" },
34
+ { :lex => :noun_possession, :description => "nouns denoting possession and transfer of possession" },
35
+ { :lex => :noun_process, :description => "nouns denoting natural processes" },
36
+ { :lex => :noun_quantity, :description => "nouns denoting quantities and units of measure" },
37
+ { :lex => :noun_relation, :description => "nouns denoting relations between people or things or ideas" },
38
+ { :lex => :noun_shape, :description => "nouns denoting two and three dimensional shapes" },
39
+ { :lex => :noun_state, :description => "nouns denoting stable states of affairs" },
40
+ { :lex => :noun_substance, :description => "nouns denoting substances" },
41
+ { :lex => :noun_time, :description => "nouns denoting time and temporal relations" },
42
+ { :lex => :verb_body, :description => "verbs of grooming, dressing and bodily care" },
43
+ { :lex => :verb_change, :description => "verbs of size, temperature change, intensifying, etc." },
44
+ { :lex => :verb_cognition, :description => "verbs of thinking, judging, analyzing, doubting" },
45
+ { :lex => :verb_communication, :description => "verbs of telling, asking, ordering, singing" },
46
+ { :lex => :verb_competition, :description => "verbs of fighting, athletic activities" },
47
+ { :lex => :verb_consumption, :description => "verbs of eating and drinking" },
48
+ { :lex => :verb_contact, :description => "verbs of touching, hitting, tying, digging" },
49
+ { :lex => :verb_creation, :description => "verbs of sewing, baking, painting, performing" },
50
+ { :lex => :verb_emotion, :description => "verbs of feeling" },
51
+ { :lex => :verb_motion, :description => "verbs of walking, flying, swimming" },
52
+ { :lex => :verb_perception, :description => "verbs of seeing, hearing, feeling" },
53
+ { :lex => :verb_possession, :description => "verbs of buying, selling, owning" },
54
+ { :lex => :verb_social, :description => "verbs of political and social activities and events" },
55
+ { :lex => :verb_stative, :description => "verbs of being, having, spatial relations" },
56
+ { :lex => :verb_weather, :description => "verbs of raining, snowing, thawing, thundering" },
57
+ { :lex => :adj_ppl, :description => "participial adjectives" } ]
72
58
 
73
- end
59
+ def initialize(synset_id, wordnet_connection, homographs)
74
60
 
75
- def synset_type
61
+ @wordnet_connection = wordnet_connection
62
+ @synset_hash = wordnet_connection.synset(synset_id)
63
+ @homographs = homographs
76
64
 
77
- SYNSET_TYPE_TO_SYMBOL[@synset_hash["synset_type"]]
65
+ # construct some conveniance menthods for relation type access
66
+ Relation::SYMBOL_TO_RELATION.keys.each do |relation_type|
67
+ self.class.send(:define_method, "#{relation_type}s?") do
68
+ relations(relation_type).size > 0
69
+ end
70
+ self.class.send(:define_method, "#{relation_type}s") do
71
+ relations(relation_type)
72
+ end
73
+ end
78
74
 
79
- end
75
+ end
80
76
 
81
- def words
77
+ def synset_type
82
78
 
83
- @words ||= words_with_lexical_ids.map { |word_with_num| word_with_num[:word] }
79
+ SYNSET_TYPE_TO_SYMBOL[@synset_hash["synset_type"]]
84
80
 
85
- @words
81
+ end
86
82
 
87
- end
83
+ def words
88
84
 
89
- def lexical_ids
85
+ @words ||= map_from_words_with_lexical_ids(:word)
90
86
 
91
- @words ||= words_with_lexical_ids.map { |word_with_num| word_with_num[:lexical_id] }
87
+ end
92
88
 
93
- @words
89
+ def lexical_ids
94
90
 
95
- end
91
+ @words ||= map_from_words_with_lexical_ids(:lexical_id)
96
92
 
97
- def size
93
+ end
98
94
 
99
- words.size
95
+ def size
100
96
 
101
- end
97
+ words.size
102
98
 
103
- def words_with_lexical_ids
99
+ end
104
100
 
105
- @words_with_num ||= @synset_hash["words"].split('|').map { |word| word_parts = word.split('.'); { :word => word_parts[0].gsub('_', ' '), :lexical_id => word_parts[1] } }
101
+ def words_with_lexical_ids
106
102
 
107
- @words_with_num
103
+ @words_with_num ||= @synset_hash["words"].split('|').map { |word| word_parts = word.split('.'); { :word => word_parts[0].gsub('_', ' '), :lexical_id => word_parts[1] } }
108
104
 
109
- end
105
+ end
110
106
 
111
- def lexical_filenum
107
+ def lexical_filenum
112
108
 
113
- @synset_hash["lexical_filenum"]
109
+ @synset_hash["lexical_filenum"]
114
110
 
115
- end
111
+ end
116
112
 
117
- def lexical_catagory
113
+ def lexical_catagory
118
114
 
119
- lexical[:lex]
115
+ lexical[:lex]
120
116
 
121
- end
117
+ end
122
118
 
123
- def lexical_description
119
+ def lexical_description
124
120
 
125
- lexical[:description]
121
+ lexical[:description]
126
122
 
127
- end
123
+ end
128
124
 
129
- def lexical
125
+ def lexical
130
126
 
131
- NUM_TO_LEX[lexical_filenum.to_i]
127
+ NUM_TO_LEX[lexical_filenum.to_i]
132
128
 
133
- end
129
+ end
134
130
 
135
- def synset_id
131
+ def synset_id
136
132
 
137
- @synset_hash["synset_id"]
133
+ @synset_hash["synset_id"]
138
134
 
139
- end
135
+ end
140
136
 
141
- def gloss
137
+ def gloss
142
138
 
143
- @synset_hash["gloss"]
139
+ @synset_hash["gloss"]
144
140
 
145
- end
141
+ end
146
142
 
147
- def lemma
143
+ def lemma
148
144
 
149
- @homographs.lemma
145
+ @homographs.lemma
150
146
 
151
- end
147
+ end
152
148
 
153
- def homographs
149
+ def homographs
154
150
 
155
- @homographs
151
+ @homographs
156
152
 
157
- end
153
+ end
158
154
 
159
- def inspect
155
+ def inspect
160
156
 
161
- @synset_hash.inspect
157
+ @synset_hash.inspect
162
158
 
163
- end
159
+ end
164
160
 
165
- def relations(type = :all)
161
+ def relations(type = :all)
166
162
 
167
- @relations ||= @synset_hash["relations"].split('|').map { |relation| Relation.new(relation, self, @wordnet_connection) }
163
+ @relations ||= @synset_hash["relations"].split('|').map { |relation| Relation.new(relation, self, @wordnet_connection) }
168
164
 
169
- case
170
- when Relation::SYMBOL_TO_RELATION.include?(type.to_sym)
171
- @relations.select { |relation| relation.relation_type == type.to_sym }
172
- when Relation::RELATION_TO_SYMBOL.include?(type.to_s)
173
- @relations.select { |relation| relation.relation_type == Relation::RELATION_TO_SYMBOL[type.to_s] }
174
- else
175
- @relations
176
- end
165
+ case
166
+ when Relation::SYMBOL_TO_RELATION.include?(type.to_sym)
167
+ @relations.select { |relation| relation.relation_type == type.to_sym }
168
+ when Relation::RELATION_TO_SYMBOL.include?(type.to_s)
169
+ @relations.select { |relation| relation.relation_type == Relation::RELATION_TO_SYMBOL[type.to_s] }
170
+ else
171
+ @relations
172
+ end
177
173
 
178
- end
174
+ end
179
175
 
180
- def evocations
176
+ def evocations
181
177
 
182
- evocations_arr = @wordnet_connection.evocations(synset_id)
183
- Evocations.new(evocations_arr, self, @wordnet_connection) unless evocations_arr.nil?
178
+ evocations_arr = @wordnet_connection.evocations(synset_id)
179
+ Evocations.new(evocations_arr, self, @wordnet_connection) unless evocations_arr.nil?
184
180
 
185
- end
181
+ end
186
182
 
187
- def to_s
183
+ def to_s
188
184
 
189
- @to_s ||= "#{synset_type.to_s.capitalize} including word(s): #{words.map { |word| '"' + word + '"' }.join(', ')} meaning: #{gloss}"
185
+ @to_s ||= "#{synset_type.to_s.capitalize} including word(s): #{words.map { |word| '"' + word + '"' }.join(', ')} meaning: #{gloss}"
190
186
 
191
- @to_s
187
+ end
192
188
 
193
- end
189
+ alias word lemma
190
+
191
+ private
194
192
 
195
- alias word lemma
193
+ def map_from_words_with_lexical_ids(value)
196
194
 
197
- end
195
+ words_with_lexical_ids.map { |word_with_num| word_with_num[value] }
196
+
197
+ end
198
+
199
+ end
198
200
 
199
201
  end