spanish 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2010 Norman Clarke
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
@@ -0,0 +1,11 @@
1
+ # Spanish
2
+
3
+ This library provides (or rather, will provide) some linguistic tools for
4
+ working with Spanish words.
5
+
6
+ Plans include:
7
+
8
+ * Syllabify any Spanish word.
9
+ * Correctly conjugate any Spanish infinitive verb.
10
+ * Given a conjugated verb, identify its conjugation and infinitive.
11
+ * Correctly return gender, pluralization and agreement for nouns.
@@ -0,0 +1,39 @@
1
+ # encoding: utf-8
2
+ require "phonology"
3
+ require File.expand_path("../spanish/orthography", __FILE__)
4
+ require File.expand_path("../spanish/phonology", __FILE__)
5
+ require File.expand_path("../spanish/syllable", __FILE__)
6
+
7
+ # This library provides some linguistic and orthographic tools for Spanish
8
+ # words.
9
+ module Spanish
10
+
11
+ extend self
12
+
13
+ # Returns an array of Spanish letters from string.
14
+ # Example:
15
+ # Spanish.letters("chillar")
16
+ # # => ["ch", "i", "ll", "a", "r"]
17
+ def letters(string)
18
+ string.scan(Orthography::LETTERS)
19
+ end
20
+
21
+ # Get an array of Phonology::Sounds from the string.
22
+ def get_sounds(string, *rules)
23
+ sequence = Orthography.translator.translate(string)
24
+ Phonology.rules.values.each do |rule|
25
+ sequence.apply_rule!(rule)
26
+ end
27
+ sequence
28
+ end
29
+
30
+ # Translate the Spanish string to International Phonetic Alphabet.
31
+ # Example:
32
+ #
33
+ # Spanish.get_ipa("chavo")
34
+ # # => 't͡ʃaβo
35
+ def get_ipa(string)
36
+ get_sounds(string).to_s
37
+ end
38
+
39
+ end
@@ -0,0 +1,112 @@
1
+ # encoding: utf-8
2
+ module Spanish
3
+ module Orthography
4
+
5
+ extend self
6
+
7
+ LETTERS = /ch|ll|ñ|á|é|í|ó|ú|ü|[\w]/
8
+
9
+ SCANNER = lambda {|string| string.downcase.scan(/rr|ch|ll|ñ|á|é|í|ó|ú|ü|\w/)}
10
+
11
+ SOUNDS = ::Phonology::Inventory.from_ipa(
12
+ "m", "n", "ɲ", "ŋ", "p", "b", "t", "d", "k", "ɡ", "β", "f", "v", "ð", "s",
13
+ "z", "ʒ", "ʝ", "x", "ɣ", "j", "r", "ɾ", "l", "w", "i", "u", "e", "o", "a",
14
+ "θ", "ʎ"
15
+ )
16
+
17
+ # Note that unvoiced dental fricative is assumed for "z" (and "c"). This is
18
+ # not out of bias towards Spain, but because it is the most
19
+ # information-rich reading of the orthography. The same applies to the
20
+ # presevation of a distinction between "y" and "ll". Yeísmo and other
21
+ # common phenomena can be derived later with phonological rules.
22
+ RULES = Proc.new {
23
+ vowel = ["a", "á", "e", "é", "i", "í", "o", "ó", "u", "ú", "ü"]
24
+ close_front_vowel = ["i", "e", "í", "é"]
25
+ non_close_front_vowel = vowel - close_front_vowel
26
+ case curr_char
27
+ when "á" then get(:voiced, :open, :front).hint(:primary_stress)
28
+ when "a" then get(:voiced, :open, :front)
29
+ when "b" then get(:voiced, :bilabial, :plosive)
30
+ when "c" then precedes(close_front_vowel) ? get(:unvoiced, :dental, :fricative) : get(:unvoiced, :velar, :plosive)
31
+ when "ch" then get([:alveolar, :plosive], [:postalveolar, :fricative])
32
+ when "d" then get(:voiced, :alveolar, :plosive)
33
+ when "é" then get(:close_mid, :front).hint(:primary_stress)
34
+ when "e" then get(:close_mid, :front)
35
+ when "f" then get(:unvoiced, :labiodental, :fricative)
36
+ when "g" then precedes(close_front_vowel) ? get(:unvoiced, :velar, :fricative) : get(:velar, :plosive, :voiced)
37
+ when "h" then anticipate {|sound| sound.hint(:syllable_boundary).orthography.insert(0, "h")}
38
+ when "í" then get(:close, :front).hint(:primary_stress)
39
+ when "i"
40
+ if precedes(vowel - ["i", "í"])
41
+ get(:palatal, :approximant)
42
+ else
43
+ get(:close, :front)
44
+ end
45
+ when "j" then get(:unvoiced, :velar, :fricative)
46
+ when "k" then get(:unvoiced, :velar, :plosive)
47
+ when "l" then get(:alveolar, :lateral_approximant)
48
+ when "ll" then get(:palatal, :lateral_approximant) or get(:voiced, :palatal, :fricative)
49
+ when "m" then get(:bilabial, :nasal, :voiced)
50
+ when "n" then get(:alveolar, :nasal)
51
+ when "ñ" then get(:palatal, :nasal)
52
+ when "ó" then get(:close_mid, :back).hint(:primary_stress)
53
+ when "o" then get(:close_mid, :back)
54
+ when "p" then get(:unvoiced, :bilabial, :plosive)
55
+ when "q" then get(:unvoiced, :velar, :plosive)
56
+ when "r" then initial? ? get(:trill) : get(:flap)
57
+ when "rr" then get(:trill)
58
+ when "s" then get(:unvoiced, :alveolar, :fricative)
59
+ when "t" then get(:unvoiced, :alveolar, :plosive)
60
+ when "ú"
61
+ if follows("q") or between("g", close_front_vowel)
62
+ orthography.insert(1, "ú") && nil
63
+ else
64
+ get(:close, :back).hint(:primary_stress)
65
+ end
66
+ when "u"
67
+ if follows("q") or between("g", close_front_vowel)
68
+ orthography.insert(1, "u") && nil
69
+ elsif follows(vowel - ["i", "í"]) || precedes(vowel)
70
+ get(:velar, :approximant)
71
+ else
72
+ get(:close, :back)
73
+ end
74
+ when "ü" then get(:velar, :approximant)
75
+ when "v" then get(:voiced, :bilabial, :plosive)
76
+ when "w" then get(:velar, :approximant)
77
+ when "x"
78
+ if initial?
79
+ get(:unvoiced, :velar, :fricative)
80
+ else
81
+ [get(:unvoiced, :velar, :plosive), Phonology::Sound.new("s")]
82
+ end
83
+
84
+ when "y"
85
+ if initial? && final?
86
+ get(:close, :front)
87
+ elsif final?
88
+ get(:palatal, :approximant)
89
+ elsif precedes(vowel)
90
+ get(:voiced, :palatal, :fricative)
91
+ elsif initial? and !precedes(vowel)
92
+ get(:close, :front)
93
+ elsif !precedes(vowel)
94
+ get(:palatal, :approximant)
95
+ end
96
+ when "z" then get(:unvoiced, :dental, :fricative)
97
+ end
98
+ }
99
+
100
+ # Get an instance of Phonology::OrthographyTranslater with scanner and sound
101
+ # inventory set for Spanish.
102
+ def translator
103
+ orth = ::Phonology::OrthographyTranslator.new
104
+ orth.scanner = SCANNER
105
+ orth.sounds = SOUNDS
106
+ orth.rules = RULES
107
+ orth
108
+ end
109
+
110
+ end
111
+
112
+ end
@@ -0,0 +1,34 @@
1
+ # encoding: utf-8
2
+ module Spanish
3
+
4
+ module Phonology
5
+
6
+ extend self
7
+
8
+ attr_reader :rules
9
+
10
+ @rules = {
11
+ :sprinantization => ::Phonology::Rule.new {
12
+ if voiced? and plosive? and !initial? and precedes(:vocoid) and !follows(:nasal)
13
+ if non_coronal?
14
+ add :fricative
15
+ elsif coronal? and !follows(:lateral_approximant)
16
+ add :fricative, :dental
17
+ end
18
+ end
19
+ },
20
+ :seseo => ::Phonology::Rule.new {
21
+ add :alveolar if dental? and unvoiced?
22
+ },
23
+ :voicing => ::Phonology::Rule.new {
24
+ voice if alveolar? and fricative? and precedes(:voiced, :non_vocoid)
25
+ }
26
+ }
27
+
28
+ def apply_rules(array)
29
+ rules.values.inject(array) {|result, rule| rule.apply(result)}
30
+ end
31
+
32
+ end
33
+
34
+ end
@@ -0,0 +1,175 @@
1
+ # @TODO
2
+ # Problem words: insubstancialidad, descouella
3
+ module Spanish
4
+ class Syllable
5
+
6
+ attr_accessor :onset, :coda, :nucleus, :stress
7
+
8
+ def initialize(sound = nil)
9
+ @onset = []
10
+ @nucleus = []
11
+ @coda = []
12
+ add sound if sound
13
+ end
14
+
15
+ def to_a
16
+ [onset, rime].flatten
17
+ end
18
+
19
+ def valid?
20
+ !nucleus.empty?
21
+ end
22
+
23
+ def rime
24
+ [nucleus, coda]
25
+ end
26
+
27
+ def to_s
28
+ string = to_a.map(&:symbol).join
29
+ (stress ? "\u02c8" : "") + string
30
+ end
31
+
32
+ def wants?(sound)
33
+ onset_wants?(sound) or nucleus_wants?(sound) or coda_wants?(sound)
34
+ end
35
+
36
+ def onset_wants?(sound)
37
+ if !nucleus.empty? || sound.vocalic?
38
+ false
39
+ elsif onset.empty?
40
+ sound.consonantal?
41
+ else
42
+ sound.liquid? || sound.approximant?
43
+ end
44
+ end
45
+
46
+ def nucleus_wants?(sound)
47
+ if !coda.empty? || nucleus.length == 2 || !sound.vocalic?
48
+ false
49
+ elsif nucleus.empty?
50
+ true
51
+ elsif nucleus.last != sound
52
+ !nucleus.last.hints.include?(:primary_stress) &&
53
+ !sound.hints.include?(:syllable_boundary) &&
54
+ (nucleus.last.close? || !nucleus.last.close? && sound.close?)
55
+ end
56
+ end
57
+
58
+ def coda_wants?(sound)
59
+ if nucleus.empty?
60
+ false
61
+ else
62
+ # Codas don't want a rising dipthong but will accept one at the end of words.
63
+ sound.consonantal? && !(sound.approximant? && sound.palatal?)
64
+ end
65
+ end
66
+
67
+ def <<(sound)
68
+ @stress = true if sound.hints.include?(:primary_stress)
69
+ if onset_wants?(sound)
70
+ @onset << sound
71
+ elsif nucleus_wants?(sound)
72
+ @nucleus << sound
73
+ else
74
+ @coda << sound
75
+ end
76
+ end
77
+ alias add <<
78
+
79
+ def empty?
80
+ onset.empty? && nucleus.empty? && coda.empty?
81
+ end
82
+
83
+ def self.apply_stress(syllables)
84
+ if syllables.detect {|s| s.stress}
85
+ elsif syllables.length == 1
86
+ syllables[0].stress = true
87
+ else
88
+ last = syllables.last.to_a
89
+ penult = syllables[-2].to_a
90
+ if last.last.vocalic? or last.last.nasal? or (last.last.alveolar? && last.last.fricative?)
91
+ syllables[-2].stress = true
92
+ else
93
+ syllables.last.stress = true
94
+ end
95
+ end
96
+ syllables
97
+ end
98
+
99
+ def self.syllabify(arg)
100
+ arg = arg.kind_of?(String) ? Spanish.get_sounds(arg) : arg
101
+ apply_stress(Syllables.new(arg).entries)
102
+ end
103
+
104
+ class Syllables
105
+
106
+ include Enumerable
107
+
108
+ attr :index, :sounds, :syllable
109
+
110
+ def initialize(sounds)
111
+ @sounds = sounds
112
+ end
113
+
114
+ def each(&block)
115
+ begin
116
+ sounds.each_index { |i| @index = i; append or do_yield(&block) }
117
+ do_yield(&block)
118
+ ensure
119
+ @index = 0
120
+ @syllable = nil
121
+ end
122
+ end
123
+
124
+ private
125
+
126
+ def do_yield(&block)
127
+ yield syllable
128
+ @syllable = Syllable.new(curr)
129
+ end
130
+
131
+ def syllable
132
+ @syllable ||= Syllable.new
133
+ end
134
+
135
+ def append
136
+ return if !curr
137
+ # Final consonantal has nowhere else to go.
138
+ if !nex && curr.consonantal?
139
+ syllable << curr
140
+ # If there's no room in the syllable, we're forced to start a new one.
141
+ elsif !syllable.wants? curr
142
+ false
143
+ # Spanish has a strong aversion to syllable-initial consonant clusters
144
+ # beginning with "s".
145
+ elsif syllable.wants?(curr) && nex && curr.alveolar? && curr.fricative? && nex.non_vocoid?
146
+ syllable << curr
147
+ # Otherwise, the preference is to be the onset of a new syllable. This
148
+ # is only possible when the syllable we would create has sonority rising
149
+ # towards the nucleus.
150
+ elsif nex && syllable.valid? && (nex.sonority - curr.sonority > 1)
151
+ false
152
+ # Default action is to append to current syllable.
153
+ else
154
+ syllable << curr
155
+ end
156
+ end
157
+
158
+ def curr
159
+ @sounds[index]
160
+ end
161
+
162
+ def prev
163
+ @sounds[index - 1] unless index == 0
164
+ end
165
+
166
+ def nex
167
+ @sounds[index + 1]
168
+ end
169
+
170
+ end
171
+
172
+
173
+ end
174
+
175
+ end
@@ -0,0 +1,38 @@
1
+ module Spanish
2
+
3
+ module Features
4
+
5
+ extend self
6
+
7
+ attr_reader :person, :mood, :tense
8
+
9
+ # if no bits set, then 1st
10
+ @person = {
11
+ :second => 1 << 0,
12
+ :third => 1 << 1,
13
+ :plural => 1 << 2,
14
+ :familiar => 1 << 3
15
+ }
16
+
17
+ # if no bits set, then indicative
18
+ @mood = {
19
+ :subjunctive => 1 << 0,
20
+ :progressive => 1 << 1,
21
+ :imperative => 1 << 2,
22
+ :negative => 1 << 3 # can only be combined with imperative
23
+ }
24
+
25
+ # if no bits set, then present
26
+ @tense = {
27
+ :preterite => 1 << 0,
28
+ :imperfect => 1 << 1,
29
+ :future => 1 << 2,
30
+ :conditional => 1 << 3
31
+ }
32
+
33
+ end
34
+
35
+ class Verb
36
+ end
37
+
38
+ end
@@ -0,0 +1,3 @@
1
+ module Spanish
2
+ VERSION = "0.0.2"
3
+ end
@@ -0,0 +1,69 @@
1
+ # encoding: utf-8
2
+ require File.expand_path("../test_helper", __FILE__)
3
+
4
+ class PhonologyTest < Test::Unit::TestCase
5
+
6
+ test "c as s or k" do
7
+ assert_sound "kasa", "casa"
8
+ assert_sound "bisi", "bici"
9
+ assert_sound "fɾanko", "franco"
10
+ end
11
+
12
+ test "x" do
13
+ assert_sound "eksamen", "examen"
14
+ end
15
+
16
+ test "g as ɣ, g, or x" do
17
+ assert_sound "ɡustaβo", "gustavo"
18
+ assert_sound "laɣo", "lago"
19
+ assert_sound "ximena", "gimena"
20
+ assert_sound "xeɾman", "germán"
21
+ end
22
+
23
+ test "sprirantization" do
24
+ assert_sound "alɣo", "algo"
25
+ assert_sound "ranɡo", "rango"
26
+ assert_sound "kaldo", "caldo"
27
+ assert_sound "kandaðo", "candado"
28
+ end
29
+
30
+ test "y" do
31
+ assert_sound "i", "y"
32
+ assert_sound "kaʝo", "cayó"
33
+ assert_sound "iɣwasu", "yguazú"
34
+ assert_sound "doj", "doy"
35
+ end
36
+
37
+ test "voicing" do
38
+ assert_sound "razɣo", "rasgo"
39
+ assert_sound "xazmin", "jazmín"
40
+ end
41
+
42
+ test "trilled r and flap r" do
43
+ assert_sound "ropa", "ropa"
44
+ assert_sound "foɾo", "foro"
45
+ assert_sound "foraɾ", "forrar"
46
+ end
47
+
48
+ test "q, g and u" do
49
+ assert_sound "ke", "que"
50
+ assert_sound "ɡwemes", "güemes"
51
+ assert_sound "ɡera", "guerra"
52
+ assert_sound "ɡiɲo", "guiño"
53
+ assert_sound "pinɡwino", "pingüino"
54
+ end
55
+
56
+ test "diphthongs" do
57
+ assert_sound "buo", "buho"
58
+ assert_sound "pua", "púa"
59
+ assert_sound "oeste", "oeste"
60
+ end
61
+
62
+ private
63
+
64
+ def assert_sound(expected, given)
65
+ assert_equal expected, Spanish.get_ipa(given)
66
+ assert_equal given.downcase, Spanish.get_sounds(given).map(&:orthography).join
67
+ end
68
+
69
+ end
@@ -0,0 +1,43 @@
1
+ # encoding: utf-8
2
+ require File.expand_path("../test_helper", __FILE__)
3
+
4
+ class SyllabificationTest < Test::Unit::TestCase
5
+
6
+ include Phonology
7
+ include Spanish
8
+
9
+ test "should add consonant to empty onset" do
10
+ s = Syllable.new
11
+ assert s.onset_wants? Sound.new("t")
12
+ assert s.onset_wants? Sound.new("l")
13
+ assert s.onset_wants? Sound.new("w")
14
+ end
15
+
16
+ test "should not add vowel to empty onsent" do
17
+ s = Syllable.new
18
+ assert !s.onset_wants?(Sound.new("o"))
19
+ end
20
+
21
+ test "can append consonant to onset if liquid" do
22
+ s = Syllable.new
23
+ s.onset << Sound.new("t")
24
+ assert s.onset_wants? Sound.new("l")
25
+ assert s.onset_wants? Sound.new("ɾ")
26
+ end
27
+
28
+ test "can append consonant to onset if approximant" do
29
+ s = Syllable.new
30
+ s.onset << Sound.new("k")
31
+ assert s.onset_wants? Sound.new("w")
32
+ end
33
+
34
+ test "can not append sound to onset if non-liquid and non-approximant" do
35
+ s = Syllable.new
36
+ s.onset << Sound.new("k")
37
+ assert !s.onset_wants?(Sound.new("n"))
38
+ assert !s.onset_wants?(Sound.new("r"))
39
+ assert !s.onset_wants?(Sound.new("s"))
40
+ assert !s.onset_wants?(Sound.new("a"))
41
+ end
42
+
43
+ end
@@ -0,0 +1,10 @@
1
+ require "test/unit"
2
+ require File.expand_path("../../lib/spanish", __FILE__)
3
+ include Spanish
4
+
5
+ Test::Unit::TestCase.extend Module.new {
6
+ def test(name, &block)
7
+ define_method("test_#{name.gsub(/[^a-z0-9]/i, "_")}".to_sym, &block)
8
+ end
9
+ alias should test
10
+ }
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: spanish
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 2
9
+ version: 0.0.2
10
+ platform: ruby
11
+ authors:
12
+ - Norman Clarke
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-05-20 00:00:00 -03:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: phonology
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 0
29
+ - 0
30
+ - 5
31
+ version: 0.0.5
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ description: A Spanish phonology, orthography and grammar library for Ruby.
35
+ email: norman@njclarke.com
36
+ executables: []
37
+
38
+ extensions: []
39
+
40
+ extra_rdoc_files: []
41
+
42
+ files:
43
+ - lib/spanish.rb
44
+ - lib/spanish/orthography.rb
45
+ - lib/spanish/phonology.rb
46
+ - lib/spanish/syllable.rb
47
+ - lib/spanish/verb.rb
48
+ - lib/spanish/version.rb
49
+ - test/phonology_test.rb
50
+ - test/syllabification_test.rb
51
+ - test/test_helper.rb
52
+ - README.md
53
+ - LICENSE
54
+ has_rdoc: true
55
+ homepage: http://github.com/norman/spanish
56
+ licenses: []
57
+
58
+ post_install_message:
59
+ rdoc_options: []
60
+
61
+ require_paths:
62
+ - lib
63
+ required_ruby_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ segments:
68
+ - 1
69
+ - 9
70
+ version: "1.9"
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ segments:
76
+ - 0
77
+ version: "0"
78
+ requirements: []
79
+
80
+ rubyforge_project: "[none]"
81
+ rubygems_version: 1.3.6
82
+ signing_key:
83
+ specification_version: 3
84
+ summary: Spanish phonology, orthography and grammar library for Ruby.
85
+ test_files: []
86
+