lingua 0.5.1 → 0.5.2
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +0 -1
- data/VERSION +1 -1
- data/lib/lingua/en/syllable.rb +21 -37
- metadata +4 -18
- data/lib/lingua/en/syllable/dict +0 -0
- data/lib/lingua/en/syllable/dictionary.rb +0 -107
data/Rakefile
CHANGED
@@ -10,7 +10,6 @@ begin
|
|
10
10
|
gem.email = "dbalatero@gmail.com"
|
11
11
|
gem.homepage = "http://github.com/dbalatero/lingua"
|
12
12
|
gem.authors = ["David Balatero"]
|
13
|
-
gem.add_dependency 'gdbm'
|
14
13
|
gem.add_development_dependency "rspec", ">= 1.2.9"
|
15
14
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
16
15
|
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.5.
|
1
|
+
0.5.2
|
data/lib/lingua/en/syllable.rb
CHANGED
@@ -1,39 +1,23 @@
|
|
1
|
+
require 'lingua/en/syllable/guess'
|
2
|
+
|
1
3
|
module Lingua
|
2
|
-
module EN
|
3
|
-
# The module Lingua::EN::Syllable contains a single class method, +syllable+,
|
4
|
-
# which will use the most accurate technique available to determine the number
|
5
|
-
# syllables in a string containing a word passed to it.
|
6
|
-
# The exact definition of the function depends on the availability of the
|
7
|
-
# Carnegie Mellon Pronouncing Dictionary on the system. If it is available,
|
8
|
-
# the number of syllables as determined by the dictionary will be returned. If
|
9
|
-
# the dictionary is not available, or if a word not contained in the dictionary
|
10
|
-
# is passed, it will return the number of syllables as determined by the
|
11
|
-
# module Lingua::EN::Syllable::Guess. For more details, see there and
|
12
|
-
# Lingua::EN::Syllable::Dictionary.
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
return Dictionary::syllables(word)
|
22
|
-
rescue Dictionary::LookUpError
|
23
|
-
return Guess::syllables(word)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
rescue LoadError # dictionary not available?
|
27
|
-
require 'lingua/en/syllable/guess.rb'
|
28
|
-
def Syllable.syllables(word)
|
29
|
-
Guess::syllables word
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
4
|
+
module EN
|
5
|
+
# The module Lingua::EN::Syllable contains a single class method, +syllable+,
|
6
|
+
# which will use the most accurate technique available to determine the number
|
7
|
+
# syllables in a string containing a word passed to it.
|
8
|
+
# The exact definition of the function depends on the availability of the
|
9
|
+
# Carnegie Mellon Pronouncing Dictionary on the system. If it is available,
|
10
|
+
# the number of syllables as determined by the dictionary will be returned. If
|
11
|
+
# the dictionary is not available, or if a word not contained in the dictionary
|
12
|
+
# is passed, it will return the number of syllables as determined by the
|
13
|
+
# module Lingua::EN::Syllable::Guess. For more details, see there and
|
14
|
+
# Lingua::EN::Syllable::Dictionary.
|
15
|
+
#
|
16
|
+
# dbalatero: removed dictionary.
|
17
|
+
module Syllable
|
18
|
+
def self.syllables(word)
|
19
|
+
Guess::syllables word
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
33
23
|
end
|
34
|
-
end
|
35
|
-
|
36
|
-
if __FILE__ == $0
|
37
|
-
ARGV.each { | word | puts "'#{word}' : " +
|
38
|
-
Lingua::EN::Syllable::syllables(word).to_s }
|
39
|
-
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 5
|
8
|
-
-
|
9
|
-
version: 0.5.
|
8
|
+
- 2
|
9
|
+
version: 0.5.2
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- David Balatero
|
@@ -17,22 +17,10 @@ cert_chain: []
|
|
17
17
|
date: 2010-04-11 00:00:00 -07:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
|
-
- !ruby/object:Gem::Dependency
|
21
|
-
name: gdbm
|
22
|
-
prerelease: false
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
-
requirements:
|
25
|
-
- - ">="
|
26
|
-
- !ruby/object:Gem::Version
|
27
|
-
segments:
|
28
|
-
- 0
|
29
|
-
version: "0"
|
30
|
-
type: :runtime
|
31
|
-
version_requirements: *id001
|
32
20
|
- !ruby/object:Gem::Dependency
|
33
21
|
name: rspec
|
34
22
|
prerelease: false
|
35
|
-
requirement: &
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
36
24
|
requirements:
|
37
25
|
- - ">="
|
38
26
|
- !ruby/object:Gem::Version
|
@@ -42,7 +30,7 @@ dependencies:
|
|
42
30
|
- 9
|
43
31
|
version: 1.2.9
|
44
32
|
type: :development
|
45
|
-
version_requirements: *
|
33
|
+
version_requirements: *id001
|
46
34
|
description: Provides sentence splitting, syllable, and text-quality algorithms.
|
47
35
|
email: dbalatero@gmail.com
|
48
36
|
executables: []
|
@@ -63,8 +51,6 @@ files:
|
|
63
51
|
- lib/lingua/en/readability.rb
|
64
52
|
- lib/lingua/en/sentence.rb
|
65
53
|
- lib/lingua/en/syllable.rb
|
66
|
-
- lib/lingua/en/syllable/dict
|
67
|
-
- lib/lingua/en/syllable/dictionary.rb
|
68
54
|
- lib/lingua/en/syllable/guess.rb
|
69
55
|
- spec/spec.opts
|
70
56
|
- spec/spec_helper.rb
|
data/lib/lingua/en/syllable/dict
DELETED
Binary file
|
@@ -1,107 +0,0 @@
|
|
1
|
-
module Lingua
|
2
|
-
module EN
|
3
|
-
module Syllable
|
4
|
-
|
5
|
-
module Dictionary
|
6
|
-
class LookUpError < IndexError
|
7
|
-
end
|
8
|
-
|
9
|
-
@@dictionary = nil
|
10
|
-
@@dbmclass = nil
|
11
|
-
@@dbmext = nil
|
12
|
-
|
13
|
-
# use an available dbm-style hash
|
14
|
-
[ 'gdbm', 'dbm'].each do | dbm |
|
15
|
-
begin
|
16
|
-
require dbm
|
17
|
-
@@dbmclass = Module.const_get(dbm.upcase)
|
18
|
-
rescue
|
19
|
-
next
|
20
|
-
end
|
21
|
-
break
|
22
|
-
end
|
23
|
-
|
24
|
-
if @@dbmclass.nil?
|
25
|
-
raise LoadError,
|
26
|
-
"no dbm class available for Lingua::EN::Syllable::Dictionary"
|
27
|
-
end
|
28
|
-
|
29
|
-
# Look up word in the dbm dictionary.
|
30
|
-
def Dictionary.syllables(word)
|
31
|
-
if @@dictionary.nil?
|
32
|
-
load_dictionary
|
33
|
-
end
|
34
|
-
word = word.upcase
|
35
|
-
begin
|
36
|
-
pronounce = @@dictionary.fetch(word)
|
37
|
-
rescue IndexError
|
38
|
-
if word =~ /'/
|
39
|
-
word = word.delete "'"
|
40
|
-
retry
|
41
|
-
end
|
42
|
-
raise LookUpError, "word #{word} not in dictionary"
|
43
|
-
end
|
44
|
-
|
45
|
-
pronounce.split(/-/).grep(/^[AEIUO]/).length
|
46
|
-
end
|
47
|
-
|
48
|
-
def Dictionary.dictionary
|
49
|
-
if @@dictionary.nil?
|
50
|
-
load_dictionary
|
51
|
-
end
|
52
|
-
@@dictionary
|
53
|
-
end
|
54
|
-
|
55
|
-
# convert a text file dictionary into dbm files. Returns the file names
|
56
|
-
# of the created dbms.
|
57
|
-
def Dictionary.make_dictionary(source_file, output_dir)
|
58
|
-
begin
|
59
|
-
Dir.mkdir(output_dir)
|
60
|
-
rescue
|
61
|
-
end
|
62
|
-
|
63
|
-
# clean old dictionary dbms
|
64
|
-
Dir.foreach(output_dir) do | x |
|
65
|
-
next if x =~ /^\.\.?$/
|
66
|
-
File.unlink(File.join(output_dir, x))
|
67
|
-
end
|
68
|
-
|
69
|
-
dbm = @@dbmclass.new(File.join(output_dir, 'dict'))
|
70
|
-
|
71
|
-
begin
|
72
|
-
IO.foreach(source_file) do | line |
|
73
|
-
next if line !~ /^[A-Z]/
|
74
|
-
line.chomp!
|
75
|
-
(word, *phonemes) = line.split(/ ?/)
|
76
|
-
next if word =~ /\(\d\) ?$/ # ignore alternative pronunciations
|
77
|
-
dbm.store(word, phonemes.join("-"))
|
78
|
-
end
|
79
|
-
rescue
|
80
|
-
# close and clean up
|
81
|
-
dbm.close
|
82
|
-
Dir.foreach(output_dir) do | x |
|
83
|
-
next if x =~ /^\.\.?$/
|
84
|
-
File.unlink(File.join('dict', x))
|
85
|
-
end
|
86
|
-
# delete files
|
87
|
-
raise
|
88
|
-
end
|
89
|
-
|
90
|
-
dbm.close
|
91
|
-
|
92
|
-
Dir.entries(output_dir).collect { | x |
|
93
|
-
x =~ /^\.\.?$/ ? nil : File.join("dict", x)
|
94
|
-
}.compact
|
95
|
-
end
|
96
|
-
|
97
|
-
private
|
98
|
-
def Dictionary.load_dictionary
|
99
|
-
@@dictionary = @@dbmclass.new( __FILE__[0..-14] + 'dict')
|
100
|
-
if @@dictionary.keys.length.zero?
|
101
|
-
raise LoadError, "dictionary file not found"
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
105
|
-
end
|
106
|
-
end
|
107
|
-
end
|