lingua 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +0 -1
- data/VERSION +1 -1
- data/lib/lingua/en/syllable.rb +21 -37
- metadata +4 -18
- data/lib/lingua/en/syllable/dict +0 -0
- data/lib/lingua/en/syllable/dictionary.rb +0 -107
data/Rakefile
CHANGED
@@ -10,7 +10,6 @@ begin
|
|
10
10
|
gem.email = "dbalatero@gmail.com"
|
11
11
|
gem.homepage = "http://github.com/dbalatero/lingua"
|
12
12
|
gem.authors = ["David Balatero"]
|
13
|
-
gem.add_dependency 'gdbm'
|
14
13
|
gem.add_development_dependency "rspec", ">= 1.2.9"
|
15
14
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
16
15
|
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.2
|
data/lib/lingua/en/syllable.rb
CHANGED
@@ -1,39 +1,23 @@
|
|
1
|
+
require 'lingua/en/syllable/guess'
|
2
|
+
|
1
3
|
module Lingua
|
2
|
-
module EN
|
3
|
-
# The module Lingua::EN::Syllable contains a single class method, +syllable+,
|
4
|
-
# which will use the most accurate technique available to determine the number
|
5
|
-
# syllables in a string containing a word passed to it.
|
6
|
-
# The exact definition of the function depends on the availability of the
|
7
|
-
# Carnegie Mellon Pronouncing Dictionary on the system. If it is available,
|
8
|
-
# the number of syllables as determined by the dictionary will be returned. If
|
9
|
-
# the dictionary is not available, or if a word not contained in the dictionary
|
10
|
-
# is passed, it will return the number of syllables as determined by the
|
11
|
-
# module Lingua::EN::Syllable::Guess. For more details, see there and
|
12
|
-
# Lingua::EN::Syllable::Dictionary.
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
return Dictionary::syllables(word)
|
22
|
-
rescue Dictionary::LookUpError
|
23
|
-
return Guess::syllables(word)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
rescue LoadError # dictionary not available?
|
27
|
-
require 'lingua/en/syllable/guess.rb'
|
28
|
-
def Syllable.syllables(word)
|
29
|
-
Guess::syllables word
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
4
|
+
module EN
|
5
|
+
# The module Lingua::EN::Syllable contains a single class method, +syllable+,
|
6
|
+
# which will use the most accurate technique available to determine the number
|
7
|
+
# syllables in a string containing a word passed to it.
|
8
|
+
# The exact definition of the function depends on the availability of the
|
9
|
+
# Carnegie Mellon Pronouncing Dictionary on the system. If it is available,
|
10
|
+
# the number of syllables as determined by the dictionary will be returned. If
|
11
|
+
# the dictionary is not available, or if a word not contained in the dictionary
|
12
|
+
# is passed, it will return the number of syllables as determined by the
|
13
|
+
# module Lingua::EN::Syllable::Guess. For more details, see there and
|
14
|
+
# Lingua::EN::Syllable::Dictionary.
|
15
|
+
#
|
16
|
+
# dbalatero: removed dictionary.
|
17
|
+
module Syllable
|
18
|
+
def self.syllables(word)
|
19
|
+
Guess::syllables word
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
33
23
|
end
|
34
|
-
end
|
35
|
-
|
36
|
-
if __FILE__ == $0
|
37
|
-
ARGV.each { | word | puts "'#{word}' : " +
|
38
|
-
Lingua::EN::Syllable::syllables(word).to_s }
|
39
|
-
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 5
|
8
|
-
-
|
9
|
-
version: 0.5.
|
8
|
+
- 2
|
9
|
+
version: 0.5.2
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- David Balatero
|
@@ -17,22 +17,10 @@ cert_chain: []
|
|
17
17
|
date: 2010-04-11 00:00:00 -07:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
|
-
- !ruby/object:Gem::Dependency
|
21
|
-
name: gdbm
|
22
|
-
prerelease: false
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
-
requirements:
|
25
|
-
- - ">="
|
26
|
-
- !ruby/object:Gem::Version
|
27
|
-
segments:
|
28
|
-
- 0
|
29
|
-
version: "0"
|
30
|
-
type: :runtime
|
31
|
-
version_requirements: *id001
|
32
20
|
- !ruby/object:Gem::Dependency
|
33
21
|
name: rspec
|
34
22
|
prerelease: false
|
35
|
-
requirement: &
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
36
24
|
requirements:
|
37
25
|
- - ">="
|
38
26
|
- !ruby/object:Gem::Version
|
@@ -42,7 +30,7 @@ dependencies:
|
|
42
30
|
- 9
|
43
31
|
version: 1.2.9
|
44
32
|
type: :development
|
45
|
-
version_requirements: *
|
33
|
+
version_requirements: *id001
|
46
34
|
description: Provides sentence splitting, syllable, and text-quality algorithms.
|
47
35
|
email: dbalatero@gmail.com
|
48
36
|
executables: []
|
@@ -63,8 +51,6 @@ files:
|
|
63
51
|
- lib/lingua/en/readability.rb
|
64
52
|
- lib/lingua/en/sentence.rb
|
65
53
|
- lib/lingua/en/syllable.rb
|
66
|
-
- lib/lingua/en/syllable/dict
|
67
|
-
- lib/lingua/en/syllable/dictionary.rb
|
68
54
|
- lib/lingua/en/syllable/guess.rb
|
69
55
|
- spec/spec.opts
|
70
56
|
- spec/spec_helper.rb
|
data/lib/lingua/en/syllable/dict
DELETED
Binary file
|
@@ -1,107 +0,0 @@
|
|
1
|
-
module Lingua
|
2
|
-
module EN
|
3
|
-
module Syllable
|
4
|
-
|
5
|
-
module Dictionary
|
6
|
-
class LookUpError < IndexError
|
7
|
-
end
|
8
|
-
|
9
|
-
@@dictionary = nil
|
10
|
-
@@dbmclass = nil
|
11
|
-
@@dbmext = nil
|
12
|
-
|
13
|
-
# use an available dbm-style hash
|
14
|
-
[ 'gdbm', 'dbm'].each do | dbm |
|
15
|
-
begin
|
16
|
-
require dbm
|
17
|
-
@@dbmclass = Module.const_get(dbm.upcase)
|
18
|
-
rescue
|
19
|
-
next
|
20
|
-
end
|
21
|
-
break
|
22
|
-
end
|
23
|
-
|
24
|
-
if @@dbmclass.nil?
|
25
|
-
raise LoadError,
|
26
|
-
"no dbm class available for Lingua::EN::Syllable::Dictionary"
|
27
|
-
end
|
28
|
-
|
29
|
-
# Look up word in the dbm dictionary.
|
30
|
-
def Dictionary.syllables(word)
|
31
|
-
if @@dictionary.nil?
|
32
|
-
load_dictionary
|
33
|
-
end
|
34
|
-
word = word.upcase
|
35
|
-
begin
|
36
|
-
pronounce = @@dictionary.fetch(word)
|
37
|
-
rescue IndexError
|
38
|
-
if word =~ /'/
|
39
|
-
word = word.delete "'"
|
40
|
-
retry
|
41
|
-
end
|
42
|
-
raise LookUpError, "word #{word} not in dictionary"
|
43
|
-
end
|
44
|
-
|
45
|
-
pronounce.split(/-/).grep(/^[AEIUO]/).length
|
46
|
-
end
|
47
|
-
|
48
|
-
def Dictionary.dictionary
|
49
|
-
if @@dictionary.nil?
|
50
|
-
load_dictionary
|
51
|
-
end
|
52
|
-
@@dictionary
|
53
|
-
end
|
54
|
-
|
55
|
-
# convert a text file dictionary into dbm files. Returns the file names
|
56
|
-
# of the created dbms.
|
57
|
-
def Dictionary.make_dictionary(source_file, output_dir)
|
58
|
-
begin
|
59
|
-
Dir.mkdir(output_dir)
|
60
|
-
rescue
|
61
|
-
end
|
62
|
-
|
63
|
-
# clean old dictionary dbms
|
64
|
-
Dir.foreach(output_dir) do | x |
|
65
|
-
next if x =~ /^\.\.?$/
|
66
|
-
File.unlink(File.join(output_dir, x))
|
67
|
-
end
|
68
|
-
|
69
|
-
dbm = @@dbmclass.new(File.join(output_dir, 'dict'))
|
70
|
-
|
71
|
-
begin
|
72
|
-
IO.foreach(source_file) do | line |
|
73
|
-
next if line !~ /^[A-Z]/
|
74
|
-
line.chomp!
|
75
|
-
(word, *phonemes) = line.split(/ ?/)
|
76
|
-
next if word =~ /\(\d\) ?$/ # ignore alternative pronunciations
|
77
|
-
dbm.store(word, phonemes.join("-"))
|
78
|
-
end
|
79
|
-
rescue
|
80
|
-
# close and clean up
|
81
|
-
dbm.close
|
82
|
-
Dir.foreach(output_dir) do | x |
|
83
|
-
next if x =~ /^\.\.?$/
|
84
|
-
File.unlink(File.join('dict', x))
|
85
|
-
end
|
86
|
-
# delete files
|
87
|
-
raise
|
88
|
-
end
|
89
|
-
|
90
|
-
dbm.close
|
91
|
-
|
92
|
-
Dir.entries(output_dir).collect { | x |
|
93
|
-
x =~ /^\.\.?$/ ? nil : File.join("dict", x)
|
94
|
-
}.compact
|
95
|
-
end
|
96
|
-
|
97
|
-
private
|
98
|
-
def Dictionary.load_dictionary
|
99
|
-
@@dictionary = @@dbmclass.new( __FILE__[0..-14] + 'dict')
|
100
|
-
if @@dictionary.keys.length.zero?
|
101
|
-
raise LoadError, "dictionary file not found"
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
105
|
-
end
|
106
|
-
end
|
107
|
-
end
|