spanish 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2010 Norman Clarke
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
@@ -0,0 +1,11 @@
1
+ # Spanish
2
+
3
+ This library provides (or rather, will provide) some linguistic tools for
4
+ working with Spanish words.
5
+
6
+ Plans include:
7
+
8
+ * Syllabify any Spanish word.
9
+ * Correctly conjugate any Spanish infinitive verb.
10
+ * Given a conjugated verb, identify its conjugation and infinitive.
11
+ * Correctly return gender, pluralization and agreement for nouns.
@@ -0,0 +1,39 @@
1
+ # encoding: utf-8
2
+ require "phonology"
3
+ require File.expand_path("../spanish/orthography", __FILE__)
4
+ require File.expand_path("../spanish/phonology", __FILE__)
5
+ require File.expand_path("../spanish/syllable", __FILE__)
6
+
7
+ # This library provides some linguistic and orthographic tools for Spanish
8
+ # words.
9
+ module Spanish
10
+
11
+ extend self
12
+
13
+ # Returns an array of Spanish letters from string.
14
+ # Example:
15
+ # Spanish.letters("chillar")
16
+ # # => ["ch", "i", "ll", "a", "r"]
17
+ def letters(string)
18
+ string.scan(Orthography::LETTERS)
19
+ end
20
+
21
+ # Get an array of Phonology::Sounds from the string.
22
+ def get_sounds(string, *rules)
23
+ sequence = Orthography.translator.translate(string)
24
+ Phonology.rules.values.each do |rule|
25
+ sequence.apply_rule!(rule)
26
+ end
27
+ sequence
28
+ end
29
+
30
+ # Translate the Spanish string to International Phonetic Alphabet.
31
+ # Example:
32
+ #
33
+ # Spanish.get_ipa("chavo")
34
+ # # => 't͡ʃaβo
35
+ def get_ipa(string)
36
+ get_sounds(string).to_s
37
+ end
38
+
39
+ end
@@ -0,0 +1,112 @@
1
+ # encoding: utf-8
2
+ module Spanish
3
+ module Orthography
4
+
5
+ extend self
6
+
7
+ LETTERS = /ch|ll|ñ|á|é|í|ó|ú|ü|[\w]/
8
+
9
+ SCANNER = lambda {|string| string.downcase.scan(/rr|ch|ll|ñ|á|é|í|ó|ú|ü|\w/)}
10
+
11
+ SOUNDS = ::Phonology::Inventory.from_ipa(
12
+ "m", "n", "ɲ", "ŋ", "p", "b", "t", "d", "k", "ɡ", "β", "f", "v", "ð", "s",
13
+ "z", "ʒ", "ʝ", "x", "ɣ", "j", "r", "ɾ", "l", "w", "i", "u", "e", "o", "a",
14
+ "θ", "ʎ"
15
+ )
16
+
17
+ # Note that unvoiced dental fricative is assumed for "z" (and "c"). This is
18
+ # not out of bias towards Spain, but because it is the most
19
+ # information-rich reading of the orthography. The same applies to the
20
+ # presevation of a distinction between "y" and "ll". Yeísmo and other
21
+ # common phenomena can be derived later with phonological rules.
22
+ RULES = Proc.new {
23
+ vowel = ["a", "á", "e", "é", "i", "í", "o", "ó", "u", "ú", "ü"]
24
+ close_front_vowel = ["i", "e", "í", "é"]
25
+ non_close_front_vowel = vowel - close_front_vowel
26
+ case curr_char
27
+ when "á" then get(:voiced, :open, :front).hint(:primary_stress)
28
+ when "a" then get(:voiced, :open, :front)
29
+ when "b" then get(:voiced, :bilabial, :plosive)
30
+ when "c" then precedes(close_front_vowel) ? get(:unvoiced, :dental, :fricative) : get(:unvoiced, :velar, :plosive)
31
+ when "ch" then get([:alveolar, :plosive], [:postalveolar, :fricative])
32
+ when "d" then get(:voiced, :alveolar, :plosive)
33
+ when "é" then get(:close_mid, :front).hint(:primary_stress)
34
+ when "e" then get(:close_mid, :front)
35
+ when "f" then get(:unvoiced, :labiodental, :fricative)
36
+ when "g" then precedes(close_front_vowel) ? get(:unvoiced, :velar, :fricative) : get(:velar, :plosive, :voiced)
37
+ when "h" then anticipate {|sound| sound.hint(:syllable_boundary).orthography.insert(0, "h")}
38
+ when "í" then get(:close, :front).hint(:primary_stress)
39
+ when "i"
40
+ if precedes(vowel - ["i", "í"])
41
+ get(:palatal, :approximant)
42
+ else
43
+ get(:close, :front)
44
+ end
45
+ when "j" then get(:unvoiced, :velar, :fricative)
46
+ when "k" then get(:unvoiced, :velar, :plosive)
47
+ when "l" then get(:alveolar, :lateral_approximant)
48
+ when "ll" then get(:palatal, :lateral_approximant) or get(:voiced, :palatal, :fricative)
49
+ when "m" then get(:bilabial, :nasal, :voiced)
50
+ when "n" then get(:alveolar, :nasal)
51
+ when "ñ" then get(:palatal, :nasal)
52
+ when "ó" then get(:close_mid, :back).hint(:primary_stress)
53
+ when "o" then get(:close_mid, :back)
54
+ when "p" then get(:unvoiced, :bilabial, :plosive)
55
+ when "q" then get(:unvoiced, :velar, :plosive)
56
+ when "r" then initial? ? get(:trill) : get(:flap)
57
+ when "rr" then get(:trill)
58
+ when "s" then get(:unvoiced, :alveolar, :fricative)
59
+ when "t" then get(:unvoiced, :alveolar, :plosive)
60
+ when "ú"
61
+ if follows("q") or between("g", close_front_vowel)
62
+ orthography.insert(1, "ú") && nil
63
+ else
64
+ get(:close, :back).hint(:primary_stress)
65
+ end
66
+ when "u"
67
+ if follows("q") or between("g", close_front_vowel)
68
+ orthography.insert(1, "u") && nil
69
+ elsif follows(vowel - ["i", "í"]) || precedes(vowel)
70
+ get(:velar, :approximant)
71
+ else
72
+ get(:close, :back)
73
+ end
74
+ when "ü" then get(:velar, :approximant)
75
+ when "v" then get(:voiced, :bilabial, :plosive)
76
+ when "w" then get(:velar, :approximant)
77
+ when "x"
78
+ if initial?
79
+ get(:unvoiced, :velar, :fricative)
80
+ else
81
+ [get(:unvoiced, :velar, :plosive), Phonology::Sound.new("s")]
82
+ end
83
+
84
+ when "y"
85
+ if initial? && final?
86
+ get(:close, :front)
87
+ elsif final?
88
+ get(:palatal, :approximant)
89
+ elsif precedes(vowel)
90
+ get(:voiced, :palatal, :fricative)
91
+ elsif initial? and !precedes(vowel)
92
+ get(:close, :front)
93
+ elsif !precedes(vowel)
94
+ get(:palatal, :approximant)
95
+ end
96
+ when "z" then get(:unvoiced, :dental, :fricative)
97
+ end
98
+ }
99
+
100
+ # Get an instance of Phonology::OrthographyTranslater with scanner and sound
101
+ # inventory set for Spanish.
102
+ def translator
103
+ orth = ::Phonology::OrthographyTranslator.new
104
+ orth.scanner = SCANNER
105
+ orth.sounds = SOUNDS
106
+ orth.rules = RULES
107
+ orth
108
+ end
109
+
110
+ end
111
+
112
+ end
@@ -0,0 +1,34 @@
1
+ # encoding: utf-8
2
+ module Spanish
3
+
4
+ module Phonology
5
+
6
+ extend self
7
+
8
+ attr_reader :rules
9
+
10
+ @rules = {
11
+ :sprinantization => ::Phonology::Rule.new {
12
+ if voiced? and plosive? and !initial? and precedes(:vocoid) and !follows(:nasal)
13
+ if non_coronal?
14
+ add :fricative
15
+ elsif coronal? and !follows(:lateral_approximant)
16
+ add :fricative, :dental
17
+ end
18
+ end
19
+ },
20
+ :seseo => ::Phonology::Rule.new {
21
+ add :alveolar if dental? and unvoiced?
22
+ },
23
+ :voicing => ::Phonology::Rule.new {
24
+ voice if alveolar? and fricative? and precedes(:voiced, :non_vocoid)
25
+ }
26
+ }
27
+
28
+ def apply_rules(array)
29
+ rules.values.inject(array) {|result, rule| rule.apply(result)}
30
+ end
31
+
32
+ end
33
+
34
+ end
@@ -0,0 +1,175 @@
1
+ # @TODO
2
+ # Problem words: insubstancialidad, descouella
3
+ module Spanish
4
+ class Syllable
5
+
6
+ attr_accessor :onset, :coda, :nucleus, :stress
7
+
8
+ def initialize(sound = nil)
9
+ @onset = []
10
+ @nucleus = []
11
+ @coda = []
12
+ add sound if sound
13
+ end
14
+
15
+ def to_a
16
+ [onset, rime].flatten
17
+ end
18
+
19
+ def valid?
20
+ !nucleus.empty?
21
+ end
22
+
23
+ def rime
24
+ [nucleus, coda]
25
+ end
26
+
27
+ def to_s
28
+ string = to_a.map(&:symbol).join
29
+ (stress ? "\u02c8" : "") + string
30
+ end
31
+
32
+ def wants?(sound)
33
+ onset_wants?(sound) or nucleus_wants?(sound) or coda_wants?(sound)
34
+ end
35
+
36
+ def onset_wants?(sound)
37
+ if !nucleus.empty? || sound.vocalic?
38
+ false
39
+ elsif onset.empty?
40
+ sound.consonantal?
41
+ else
42
+ sound.liquid? || sound.approximant?
43
+ end
44
+ end
45
+
46
+ def nucleus_wants?(sound)
47
+ if !coda.empty? || nucleus.length == 2 || !sound.vocalic?
48
+ false
49
+ elsif nucleus.empty?
50
+ true
51
+ elsif nucleus.last != sound
52
+ !nucleus.last.hints.include?(:primary_stress) &&
53
+ !sound.hints.include?(:syllable_boundary) &&
54
+ (nucleus.last.close? || !nucleus.last.close? && sound.close?)
55
+ end
56
+ end
57
+
58
+ def coda_wants?(sound)
59
+ if nucleus.empty?
60
+ false
61
+ else
62
+ # Codas don't want a rising dipthong but will accept one at the end of words.
63
+ sound.consonantal? && !(sound.approximant? && sound.palatal?)
64
+ end
65
+ end
66
+
67
+ def <<(sound)
68
+ @stress = true if sound.hints.include?(:primary_stress)
69
+ if onset_wants?(sound)
70
+ @onset << sound
71
+ elsif nucleus_wants?(sound)
72
+ @nucleus << sound
73
+ else
74
+ @coda << sound
75
+ end
76
+ end
77
+ alias add <<
78
+
79
+ def empty?
80
+ onset.empty? && nucleus.empty? && coda.empty?
81
+ end
82
+
83
+ def self.apply_stress(syllables)
84
+ if syllables.detect {|s| s.stress}
85
+ elsif syllables.length == 1
86
+ syllables[0].stress = true
87
+ else
88
+ last = syllables.last.to_a
89
+ penult = syllables[-2].to_a
90
+ if last.last.vocalic? or last.last.nasal? or (last.last.alveolar? && last.last.fricative?)
91
+ syllables[-2].stress = true
92
+ else
93
+ syllables.last.stress = true
94
+ end
95
+ end
96
+ syllables
97
+ end
98
+
99
+ def self.syllabify(arg)
100
+ arg = arg.kind_of?(String) ? Spanish.get_sounds(arg) : arg
101
+ apply_stress(Syllables.new(arg).entries)
102
+ end
103
+
104
+ class Syllables
105
+
106
+ include Enumerable
107
+
108
+ attr :index, :sounds, :syllable
109
+
110
+ def initialize(sounds)
111
+ @sounds = sounds
112
+ end
113
+
114
+ def each(&block)
115
+ begin
116
+ sounds.each_index { |i| @index = i; append or do_yield(&block) }
117
+ do_yield(&block)
118
+ ensure
119
+ @index = 0
120
+ @syllable = nil
121
+ end
122
+ end
123
+
124
+ private
125
+
126
+ def do_yield(&block)
127
+ yield syllable
128
+ @syllable = Syllable.new(curr)
129
+ end
130
+
131
+ def syllable
132
+ @syllable ||= Syllable.new
133
+ end
134
+
135
+ def append
136
+ return if !curr
137
+ # Final consonantal has nowhere else to go.
138
+ if !nex && curr.consonantal?
139
+ syllable << curr
140
+ # If there's no room in the syllable, we're forced to start a new one.
141
+ elsif !syllable.wants? curr
142
+ false
143
+ # Spanish has a strong aversion to syllable-initial consonant clusters
144
+ # beginning with "s".
145
+ elsif syllable.wants?(curr) && nex && curr.alveolar? && curr.fricative? && nex.non_vocoid?
146
+ syllable << curr
147
+ # Otherwise, the preference is to be the onset of a new syllable. This
148
+ # is only possible when the syllable we would create has sonority rising
149
+ # towards the nucleus.
150
+ elsif nex && syllable.valid? && (nex.sonority - curr.sonority > 1)
151
+ false
152
+ # Default action is to append to current syllable.
153
+ else
154
+ syllable << curr
155
+ end
156
+ end
157
+
158
+ def curr
159
+ @sounds[index]
160
+ end
161
+
162
+ def prev
163
+ @sounds[index - 1] unless index == 0
164
+ end
165
+
166
+ def nex
167
+ @sounds[index + 1]
168
+ end
169
+
170
+ end
171
+
172
+
173
+ end
174
+
175
+ end
@@ -0,0 +1,38 @@
1
+ module Spanish
2
+
3
+ module Features
4
+
5
+ extend self
6
+
7
+ attr_reader :person, :mood, :tense
8
+
9
+ # if no bits set, then 1st
10
+ @person = {
11
+ :second => 1 << 0,
12
+ :third => 1 << 1,
13
+ :plural => 1 << 2,
14
+ :familiar => 1 << 3
15
+ }
16
+
17
+ # if no bits set, then indicative
18
+ @mood = {
19
+ :subjunctive => 1 << 0,
20
+ :progressive => 1 << 1,
21
+ :imperative => 1 << 2,
22
+ :negative => 1 << 3 # can only be combined with imperative
23
+ }
24
+
25
+ # if no bits set, then present
26
+ @tense = {
27
+ :preterite => 1 << 0,
28
+ :imperfect => 1 << 1,
29
+ :future => 1 << 2,
30
+ :conditional => 1 << 3
31
+ }
32
+
33
+ end
34
+
35
+ class Verb
36
+ end
37
+
38
+ end
@@ -0,0 +1,3 @@
1
+ module Spanish
2
+ VERSION = "0.0.2"
3
+ end
@@ -0,0 +1,69 @@
1
+ # encoding: utf-8
2
+ require File.expand_path("../test_helper", __FILE__)
3
+
4
+ class PhonologyTest < Test::Unit::TestCase
5
+
6
+ test "c as s or k" do
7
+ assert_sound "kasa", "casa"
8
+ assert_sound "bisi", "bici"
9
+ assert_sound "fɾanko", "franco"
10
+ end
11
+
12
+ test "x" do
13
+ assert_sound "eksamen", "examen"
14
+ end
15
+
16
+ test "g as ɣ, g, or x" do
17
+ assert_sound "ɡustaβo", "gustavo"
18
+ assert_sound "laɣo", "lago"
19
+ assert_sound "ximena", "gimena"
20
+ assert_sound "xeɾman", "germán"
21
+ end
22
+
23
+ test "sprirantization" do
24
+ assert_sound "alɣo", "algo"
25
+ assert_sound "ranɡo", "rango"
26
+ assert_sound "kaldo", "caldo"
27
+ assert_sound "kandaðo", "candado"
28
+ end
29
+
30
+ test "y" do
31
+ assert_sound "i", "y"
32
+ assert_sound "kaʝo", "cayó"
33
+ assert_sound "iɣwasu", "yguazú"
34
+ assert_sound "doj", "doy"
35
+ end
36
+
37
+ test "voicing" do
38
+ assert_sound "razɣo", "rasgo"
39
+ assert_sound "xazmin", "jazmín"
40
+ end
41
+
42
+ test "trilled r and flap r" do
43
+ assert_sound "ropa", "ropa"
44
+ assert_sound "foɾo", "foro"
45
+ assert_sound "foraɾ", "forrar"
46
+ end
47
+
48
+ test "q, g and u" do
49
+ assert_sound "ke", "que"
50
+ assert_sound "ɡwemes", "güemes"
51
+ assert_sound "ɡera", "guerra"
52
+ assert_sound "ɡiɲo", "guiño"
53
+ assert_sound "pinɡwino", "pingüino"
54
+ end
55
+
56
+ test "diphthongs" do
57
+ assert_sound "buo", "buho"
58
+ assert_sound "pua", "púa"
59
+ assert_sound "oeste", "oeste"
60
+ end
61
+
62
+ private
63
+
64
+ def assert_sound(expected, given)
65
+ assert_equal expected, Spanish.get_ipa(given)
66
+ assert_equal given.downcase, Spanish.get_sounds(given).map(&:orthography).join
67
+ end
68
+
69
+ end
@@ -0,0 +1,43 @@
1
+ # encoding: utf-8
2
+ require File.expand_path("../test_helper", __FILE__)
3
+
4
+ class SyllabificationTest < Test::Unit::TestCase
5
+
6
+ include Phonology
7
+ include Spanish
8
+
9
+ test "should add consonant to empty onset" do
10
+ s = Syllable.new
11
+ assert s.onset_wants? Sound.new("t")
12
+ assert s.onset_wants? Sound.new("l")
13
+ assert s.onset_wants? Sound.new("w")
14
+ end
15
+
16
+ test "should not add vowel to empty onsent" do
17
+ s = Syllable.new
18
+ assert !s.onset_wants?(Sound.new("o"))
19
+ end
20
+
21
+ test "can append consonant to onset if liquid" do
22
+ s = Syllable.new
23
+ s.onset << Sound.new("t")
24
+ assert s.onset_wants? Sound.new("l")
25
+ assert s.onset_wants? Sound.new("ɾ")
26
+ end
27
+
28
+ test "can append consonant to onset if approximant" do
29
+ s = Syllable.new
30
+ s.onset << Sound.new("k")
31
+ assert s.onset_wants? Sound.new("w")
32
+ end
33
+
34
+ test "can not append sound to onset if non-liquid and non-approximant" do
35
+ s = Syllable.new
36
+ s.onset << Sound.new("k")
37
+ assert !s.onset_wants?(Sound.new("n"))
38
+ assert !s.onset_wants?(Sound.new("r"))
39
+ assert !s.onset_wants?(Sound.new("s"))
40
+ assert !s.onset_wants?(Sound.new("a"))
41
+ end
42
+
43
+ end
@@ -0,0 +1,10 @@
1
+ require "test/unit"
2
+ require File.expand_path("../../lib/spanish", __FILE__)
3
+ include Spanish
4
+
5
+ Test::Unit::TestCase.extend Module.new {
6
+ def test(name, &block)
7
+ define_method("test_#{name.gsub(/[^a-z0-9]/i, "_")}".to_sym, &block)
8
+ end
9
+ alias should test
10
+ }
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: spanish
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 2
9
+ version: 0.0.2
10
+ platform: ruby
11
+ authors:
12
+ - Norman Clarke
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-05-20 00:00:00 -03:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: phonology
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 0
29
+ - 0
30
+ - 5
31
+ version: 0.0.5
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ description: A Spanish phonology, orthography and grammar library for Ruby.
35
+ email: norman@njclarke.com
36
+ executables: []
37
+
38
+ extensions: []
39
+
40
+ extra_rdoc_files: []
41
+
42
+ files:
43
+ - lib/spanish.rb
44
+ - lib/spanish/orthography.rb
45
+ - lib/spanish/phonology.rb
46
+ - lib/spanish/syllable.rb
47
+ - lib/spanish/verb.rb
48
+ - lib/spanish/version.rb
49
+ - test/phonology_test.rb
50
+ - test/syllabification_test.rb
51
+ - test/test_helper.rb
52
+ - README.md
53
+ - LICENSE
54
+ has_rdoc: true
55
+ homepage: http://github.com/norman/spanish
56
+ licenses: []
57
+
58
+ post_install_message:
59
+ rdoc_options: []
60
+
61
+ require_paths:
62
+ - lib
63
+ required_ruby_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ segments:
68
+ - 1
69
+ - 9
70
+ version: "1.9"
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ segments:
76
+ - 0
77
+ version: "0"
78
+ requirements: []
79
+
80
+ rubyforge_project: "[none]"
81
+ rubygems_version: 1.3.6
82
+ signing_key:
83
+ specification_version: 3
84
+ summary: Spanish phonology, orthography and grammar library for Ruby.
85
+ test_files: []
86
+