words 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.markdown CHANGED
@@ -56,8 +56,12 @@ Then your ready to rock and roll. :)
56
56
  To build the wordnet dataset (or index for pure) file yourself, from the original wordnet files, you can use the bundled "build_wordnet" command
57
57
 
58
58
  build_wordnet -h # this will give you the usage information
59
- sudo build_wordnet -v --build-tokyo # this would attempt to build the tokyo backend data locating the original wordnet files through a search...
60
- sudo build_wordnet -v --build-pure # this would attempt to build the pure backend index locating the original wordnet files through a search...
59
+
60
+ # this would attempt to build the tokyo backend data locating the original wordnet files through a search...
61
+ sudo build_wordnet -v --build-tokyo
62
+
63
+ # this would attempt to build the pure backend index locating the original wordnet files through a search...
64
+ sudo build_wordnet -v --build-pure
61
65
 
62
66
  ## Usage ##
63
67
 
data/Rakefile CHANGED
@@ -5,8 +5,8 @@ begin
5
5
  require 'jeweler'
6
6
  Jeweler::Tasks.new do |gem|
7
7
  gem.name = "words"
8
- gem.summary = %Q{A fast, easy to use interface to WordNet® with cross ruby distribution compatability.}
9
- gem.description = %Q{A fast, easy to use interface to WordNet® with cross ruby distribution compatability. We use TokyoCabinet to store the dataset and the excellent rufus-tokyo to interface with it. This allows us to have full compatability across ruby distributions while still remaining both fast and simple to use.}
8
+ gem.summary = %Q{A Fast & Easy to use interface to WordNet® with cross ruby distribution compatability.}
9
+ gem.description = %Q{Words, with both pure ruby & tokyo-cabinate backends, implements a fast interface to Wordnet® over the same easy-to-use API. The FFI backend makes use of Tokyo Cabinet and the FFI interface, rufus-tokyo, to provide cross ruby distribution compatability and blistering speed. The pure ruby interface operates on a special ruby optimised index along with the basic dictionary files provided by WordNet®. I have attempted to provide ease of use in the form of a simple yet powerful api and installation is a sintch!}
10
10
  gem.email = "roja@arbia.co.uk"
11
11
  gem.homepage = "http://github.com/roja/words"
12
12
  gem.authors = ["Roja Buck"]
@@ -14,8 +14,12 @@ begin
14
14
  gem.add_dependency 'rufus-tokyo', '>= 1.0.5'
15
15
  gem.executables = [ "build_wordnet" ]
16
16
  gem.default_executable = "build_wordnet"
17
+ gem.rubyforge_project = 'words'
17
18
  end
18
19
  Jeweler::GemcutterTasks.new
20
+ Jeweler::RubyforgeTasks.new do |rubyforge|
21
+ rubyforge.doc_task = "rdoc"
22
+ end
19
23
  rescue LoadError
20
24
  puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
21
25
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.0
1
+ 0.2.1
data/bin/build_wordnet CHANGED
@@ -19,102 +19,102 @@ def locate_wordnet(base_dir)
19
19
  return path + "dict" if (path + "dict/data.noun").exist?
20
20
  end
21
21
 
22
- if __FILE__ == $0
23
-
24
- puts "Words Dataset Constructor 2010 (c) Roja Buck"
25
-
26
- opts = Trollop::options do
27
- opt :verbose, "Output verbose program detail.", :default => false
28
- opt :wordnet, "Location of the wordnet dictionary directory", :default => "Search..."
29
- opt :build_tokyo, "Build the tokyo dataset?", :default => false
30
- opt :build_pure, "Build the pure ruby dataset?", :default => false
31
- end
32
- Trollop::die :build_tokyo, "Either tokyo dataset or pure ruby dataset are required" if !opts[:build_tokyo] && !opts[:build_pure]
33
- puts "Verbose mode enabled" if (VERBOSE = opts[:verbose])
34
-
35
- wordnet_dir = nil
36
- if opts[:wordnet] == "Search..."
37
- ['/usr/share/wordnet', '/usr/local/share/wordnet', '/usr/local/WordNet-3.0'].each do |potential_dir|
38
- break unless (wordnet_dir = locate_wordnet potential_dir).nil?
39
- end
40
- abort( "Unable to locate wordnet dictionary. To specify check --help." ) if wordnet_dir.nil?
41
- else
42
- wordnet_dir = locate_wordnet opts[:wordnet]
43
- abort( "Unable to locate wordnet dictionary in directory #{opts[:wordnet]}. Please check and try again." ) if wordnet_dir.nil?
22
+ puts "Words Dataset Constructor 2010 (c) Roja Buck"
23
+
24
+ opts = Trollop::options do
25
+ opt :verbose, "Output verbose program detail.", :default => false
26
+ opt :wordnet, "Location of the wordnet dictionary directory", :default => "Search..."
27
+ opt :build_tokyo, "Build the tokyo dataset?", :default => false
28
+ opt :build_pure, "Build the pure ruby dataset?", :default => false
29
+ end
30
+ Trollop::die :build_tokyo, "You need to specify whether tokyo dataset or pure ruby index building is required" if !opts[:build_tokyo] && !opts[:build_pure]
31
+ puts "Verbose mode enabled" if (VERBOSE = opts[:verbose])
32
+
33
+ gem_path = Pathname.new "#{File.dirname(__FILE__)}/.."
34
+ abort "Ensure you run the command using sudo or as a Superuser / Administrator" unless gem_path.writable?
35
+ data_path = gem_path + "data/"
36
+ data_path.mkpath
37
+
38
+ wordnet_dir = nil
39
+ if opts[:wordnet] == "Search..."
40
+ ['/usr/share/wordnet', '/usr/local/share/wordnet', '/usr/local/WordNet-3.0'].each do |potential_dir|
41
+ break unless (wordnet_dir = locate_wordnet potential_dir).nil?
44
42
  end
43
+ abort( "Unable to locate wordnet dictionary. To specify check --help." ) if wordnet_dir.nil?
44
+ else
45
+ wordnet_dir = locate_wordnet opts[:wordnet]
46
+ abort( "Unable to locate wordnet dictionary in directory #{opts[:wordnet]}. Please check and try again." ) if wordnet_dir.nil?
47
+ end
48
+
49
+ # At this point we know we should have a wordnet directory within wordnet_dir
50
+ puts "Found wordnet files in #{wordnet_dir}..." if VERBOSE
51
+
52
+ index_files = POS_FILE_TYPES.map { |pos| wordnet_dir + "index.#{pos}" }
53
+ data_files = POS_FILE_TYPES.map { |pos| wordnet_dir + "data.#{pos}" }
54
+
55
+ (index_files + data_files).each do |required_file|
56
+ abort( "Unable to locate #{required_file} within the wordnet dictionary. Please check your wordnet copy is valid and try again." ) unless required_file.exist?
57
+ abort( "Cannot get readable permissions to #{required_file} within the wordnet dictionary. Please check the file permissions and try again." ) unless required_file.readable?
58
+ end
59
+
60
+ # At this point we know we have the correct files, though we don't know there validity
61
+ puts "Validated existance of wordnet files in #{wordnet_dir}..." if VERBOSE
62
+
63
+ # Build data
64
+
65
+ index_hash = Hash.new
66
+ data_hash = Hash.new
67
+ POS_FILE_TYPES.each do |file_pos|
45
68
 
46
- # At this point we know we should have a wordnet directory within wordnet_dir
47
- puts "Found wordnet files in #{wordnet_dir}..." if VERBOSE
48
-
49
- index_files = POS_FILE_TYPES.map { |pos| wordnet_dir + "index.#{pos}" }
50
- data_files = POS_FILE_TYPES.map { |pos| wordnet_dir + "data.#{pos}" }
69
+ puts "Building #{file_pos} indexes..." if VERBOSE
51
70
 
52
- (index_files + data_files).each do |required_file|
53
- abort( "Unable to locate #{required_file} within the wordnet dictionary. Please check your wordnet copy is valid and try again." ) unless required_file.exist?
54
- abort( "Cannot get readable permissions to #{required_file} within the wordnet dictionary. Please check the file permissions and try again." ) unless required_file.readable?
71
+ # add indexes
72
+ (wordnet_dir + "index.#{file_pos}").each_line do |index_line|
73
+ next if index_line[0, 2] == " "
74
+ index_parts = index_line.split(" ")
75
+
76
+ lemma, pos, synset_count, pointer_count = index_parts.shift, index_parts.shift, index_parts.shift.to_i, index_parts.shift.to_i
77
+ pointer_symbols = Array.new(pointer_count).map { POS_FILE_TYPE_TO_SHORT[file_pos] + index_parts.shift }
78
+ sense_count = index_parts.shift
79
+ tagsense_count = pos + index_parts.shift
80
+ synset_ids = Array.new(synset_count).map { POS_FILE_TYPE_TO_SHORT[file_pos] + index_parts.shift }
81
+
82
+ index_hash[lemma] = { "synset_ids" => [], "tagsense_counts" => [] } if index_hash[lemma].nil?
83
+ index_hash[lemma] = { "lemma" => lemma, "synset_ids" => index_hash[lemma]["synset_ids"] + synset_ids, "tagsense_counts" => index_hash[lemma]["tagsense_counts"] + [tagsense_count] }
84
+
55
85
  end
56
86
 
57
- # At this point we know we have the correct files, though we don't know there validity
58
- puts "Validated existance of wordnet files in #{wordnet_dir}..." if VERBOSE
59
-
60
- # Build data
61
-
62
- index_hash = Hash.new
63
- data_hash = Hash.new
64
- POS_FILE_TYPES.each do |file_pos|
65
-
66
- puts "Building #{file_pos} indexes..." if VERBOSE
87
+ if opts[:build_tokyo]
88
+ puts "Building #{file_pos} data..." if VERBOSE
67
89
 
68
- # add indexes
69
- (wordnet_dir + "index.#{file_pos}").each_line do |index_line|
70
- next if index_line[0, 2] == " "
71
- index_parts = index_line.split(" ")
72
-
73
- lemma, pos, synset_count, pointer_count = index_parts.shift, index_parts.shift, index_parts.shift.to_i, index_parts.shift.to_i
74
- pointer_symbols = Array.new(pointer_count).map { POS_FILE_TYPE_TO_SHORT[file_pos] + index_parts.shift }
75
- sense_count = index_parts.shift
76
- tagsense_count = pos + index_parts.shift
77
- synset_ids = Array.new(synset_count).map { POS_FILE_TYPE_TO_SHORT[file_pos] + index_parts.shift }
90
+ # add data
91
+ (wordnet_dir + "data.#{file_pos}").each_line do |data_line|
92
+ next if data_line[0, 2] == " "
93
+ data_line, gloss = data_line.split(" | ")
94
+ data_parts = data_line.split(" ")
78
95
 
79
- index_hash[lemma] = { "synset_ids" => [], "tagsense_counts" => [] } if index_hash[lemma].nil?
80
- index_hash[lemma] = { "lemma" => lemma, "synset_ids" => index_hash[lemma]["synset_ids"] + synset_ids, "tagsense_counts" => index_hash[lemma]["tagsense_counts"] + [tagsense_count] }
96
+ synset_id, lexical_filenum, synset_type, word_count = POS_FILE_TYPE_TO_SHORT[file_pos] + data_parts.shift, data_parts.shift, data_parts.shift, data_parts.shift.to_i(16)
97
+ words = Array.new(word_count).map { "#{data_parts.shift}.#{data_parts.shift}" }
98
+ relations = Array.new(data_parts.shift.to_i).map { "#{data_parts.shift}.#{data_parts.shift}.#{data_parts.shift}.#{data_parts.shift}" }
81
99
 
82
- end
83
-
84
- if opts[:build_tokyo]
85
- puts "Building #{file_pos} data..." if VERBOSE
86
-
87
- # add data
88
- (wordnet_dir + "data.#{file_pos}").each_line do |data_line|
89
- next if data_line[0, 2] == " "
90
- data_line, gloss = data_line.split(" | ")
91
- data_parts = data_line.split(" ")
92
-
93
- synset_id, lexical_filenum, synset_type, word_count = POS_FILE_TYPE_TO_SHORT[file_pos] + data_parts.shift, data_parts.shift, data_parts.shift, data_parts.shift.to_i(16)
94
- words = Array.new(word_count).map { "#{data_parts.shift}.#{data_parts.shift}" }
95
- relations = Array.new(data_parts.shift.to_i).map { "#{data_parts.shift}.#{data_parts.shift}.#{data_parts.shift}.#{data_parts.shift}" }
96
-
97
- data_hash[synset_id] = { "synset_id" => synset_id, "lexical_filenum" => lexical_filenum, "synset_type" => synset_type,
100
+ data_hash[synset_id] = { "synset_id" => synset_id, "lexical_filenum" => lexical_filenum, "synset_type" => synset_type,
98
101
  "words" => words.join('|'), "relations" => relations.join('|'), "gloss" => gloss.strip }
99
- end
100
102
  end
101
-
102
- end
103
-
104
- if opts[:build_tokyo]
105
- tokyo_hash = Rufus::Tokyo::Table.new("#{File.dirname(__FILE__)}/../data/wordnet.tct")
106
- index_hash.each { |k,v| tokyo_hash[k] = { "lemma" => v["lemma"], "synset_ids" => v["synset_ids"].join('|'), "tagsense_counts" => v["tagsense_counts"].join('|') } }
107
- data_hash.each { |k,v| tokyo_hash[k] = v }
108
- tokyo_hash.close
109
103
  end
110
104
 
111
- if opts[:build_pure]
112
- index = Hash.new
113
- index_hash.each { |k,v| index[k] = [v["lemma"], v["tagsense_counts"].join('|'), v["synset_ids"].join('|')] }
114
- File.open("#{File.dirname(__FILE__)}/../data/index.dmp",'w') do |file|
115
- file.write Marshal.dump(index)
116
- end
105
+ end
106
+
107
+ if opts[:build_tokyo]
108
+ tokyo_hash = Rufus::Tokyo::Table.new((data_path + "wordnet.tct").to_s)
109
+ index_hash.each { |k,v| tokyo_hash[k] = { "lemma" => v["lemma"], "synset_ids" => v["synset_ids"].join('|'), "tagsense_counts" => v["tagsense_counts"].join('|') } }
110
+ data_hash.each { |k,v| tokyo_hash[k] = v }
111
+ tokyo_hash.close
112
+ end
113
+
114
+ if opts[:build_pure]
115
+ index = Hash.new
116
+ index_hash.each { |k,v| index[k] = [v["lemma"], v["tagsense_counts"].join('|'), v["synset_ids"].join('|')] }
117
+ File.open(data_path + "index.dmp",'w') do |file|
118
+ file.write Marshal.dump(index)
117
119
  end
118
-
119
-
120
- end
120
+ end
data/examples.rb CHANGED
@@ -1,10 +1,11 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'lib/words'
3
+ require 'rubygems'
4
+ require 'words'
4
5
 
5
6
  if __FILE__ == $0
6
7
 
7
- wordnet = Words::Words.new # :pure
8
+ wordnet = Words::Words.new :pure
8
9
 
9
10
  puts wordnet
10
11
 
data/lib/words.rb CHANGED
@@ -21,7 +21,7 @@ module Words
21
21
 
22
22
  if @data_path.exist?
23
23
  if @connection_type == :tokyo
24
- @connection = Rufus::Tokyo::Table.new(@data_path.to_s)
24
+ @connection = Rufus::Tokyo::Table.new(@data_path.to_s, :mode => 'r')
25
25
  @connected = true
26
26
  elsif @connection_type == :pure
27
27
  # open the index is there
data/words.gemspec CHANGED
@@ -5,13 +5,13 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{words}
8
- s.version = "0.2.0"
8
+ s.version = "0.2.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Roja Buck"]
12
12
  s.date = %q{2010-01-16}
13
13
  s.default_executable = %q{build_wordnet}
14
- s.description = %q{A fast, easy to use interface to WordNet® with cross ruby distribution compatability. We use TokyoCabinet to store the dataset and the excellent rufus-tokyo to interface with it. This allows us to have full compatability across ruby distributions while still remaining both fast and simple to use.}
14
+ s.description = %q{Words, with both pure ruby & tokyo-cabinate backends, implements a fast interface to Wordnet® over the same easy-to-use API. The FFI backend makes use of Tokyo Cabinet and the FFI interface, rufus-tokyo, to provide cross ruby distribution compatability and blistering speed. The pure ruby interface operates on a special ruby optimised index along with the basic dictionary files provided by WordNet®. I have attempted to provide ease of use in the form of a simple yet powerful api and installation is a sintch!}
15
15
  s.email = %q{roja@arbia.co.uk}
16
16
  s.executables = ["build_wordnet"]
17
17
  s.extra_rdoc_files = [
@@ -34,8 +34,9 @@ Gem::Specification.new do |s|
34
34
  s.homepage = %q{http://github.com/roja/words}
35
35
  s.rdoc_options = ["--charset=UTF-8"]
36
36
  s.require_paths = ["lib"]
37
+ s.rubyforge_project = %q{words}
37
38
  s.rubygems_version = %q{1.3.5}
38
- s.summary = %q{A fast, easy to use interface to WordNet® with cross ruby distribution compatability.}
39
+ s.summary = %q{A Fast & Easy to use interface to WordNet® with cross ruby distribution compatability.}
39
40
  s.test_files = [
40
41
  "test/test_words.rb",
41
42
  "test/helper.rb"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: words
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Roja Buck
@@ -32,7 +32,7 @@ dependencies:
32
32
  - !ruby/object:Gem::Version
33
33
  version: 1.0.5
34
34
  version:
35
- description: "A fast, easy to use interface to WordNet\xC2\xAE with cross ruby distribution compatability. We use TokyoCabinet to store the dataset and the excellent rufus-tokyo to interface with it. This allows us to have full compatability across ruby distributions while still remaining both fast and simple to use."
35
+ description: "Words, with both pure ruby & tokyo-cabinate backends, implements a fast interface to Wordnet\xC2\xAE over the same easy-to-use API. The FFI backend makes use of Tokyo Cabinet and the FFI interface, rufus-tokyo, to provide cross ruby distribution compatability and blistering speed. The pure ruby interface operates on a special ruby optimised index along with the basic dictionary files provided by WordNet\xC2\xAE. I have attempted to provide ease of use in the form of a simple yet powerful api and installation is a sintch!"
36
36
  email: roja@arbia.co.uk
37
37
  executables:
38
38
  - build_wordnet
@@ -76,11 +76,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
76
76
  version:
77
77
  requirements: []
78
78
 
79
- rubyforge_project:
79
+ rubyforge_project: words
80
80
  rubygems_version: 1.3.5
81
81
  signing_key:
82
82
  specification_version: 3
83
- summary: "A fast, easy to use interface to WordNet\xC2\xAE with cross ruby distribution compatability."
83
+ summary: "A Fast & Easy to use interface to WordNet\xC2\xAE with cross ruby distribution compatability."
84
84
  test_files:
85
85
  - test/test_words.rb
86
86
  - test/helper.rb