rhopalic 0.0.1.pre

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ lib/bundler/man
9
+ pkg
10
+ rdoc
11
+ spec/reports
12
+ test/tmp
13
+ test/version_tmp
14
+ tmp
15
+
16
+ # YARD artifacts
17
+ .yardoc
18
+ _yardoc
19
+ doc/
20
+
21
+ .DS_Store
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source "http://rubygems.org"
2
+ gemspec
data/LICENSE.md ADDED
@@ -0,0 +1,7 @@
1
+ Copyright (c) 2013 Nik Haldimann
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,34 @@
1
+ rhopalic
2
+ ========
3
+
4
+ A Ruby library to detect rhopalic phrases in English, i.e., a phrase in which each
5
+ word contains one letter or one syllable more than the previous word.
6
+
7
+ ## Usage
8
+
9
+ For simple yes or no answers about whether a phrase is rhopalic:
10
+
11
+ > require 'rhopalic'
12
+ => true
13
+ > Rhopalic.letter_rhopalic?("I do not know where family doctors acquired illegibly perplexing handwriting.")
14
+ => true
15
+ > Rhopalic.syllable_rhopalic?("Lines thicken approaching termination.")
16
+ => true
17
+
18
+ There is no exact algorithm for counting syllables in English. For more accurate syllable
19
+ counting use the [CMU pronunciation dictionary](http://www.speech.cs.cmu.edu/cgi-bin/cmudict):
20
+
21
+ > require 'rhopalic'
22
+ > require 'rhopalic/dictionary'
23
+ > dict = Rhopalic::Dictionary.from_file('cmudict.0.7a')
24
+ => ...
25
+ > analysis = Rhopalic::Analysis.new(dict)
26
+ => ...
27
+ > analysis.analyze_phrase("Add extra syllables gradually").syllable_rhopalic?
28
+ => true
29
+
30
+ (This is assuming that you have the dictionary file cmudict.0.7a in your working directory.)
31
+
32
+ ## License
33
+
34
+ Distributed under an [MIT license](https://github.com/nikhaldi/rhopalic-ruby/blob/master/LICENSE.md).
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ require 'bundler'
2
+ require 'rake/testtask'
3
+
4
+ Bundler::GemHelper.install_tasks
5
+
6
+ Rake::TestTask.new(:test) do |test|
7
+ test.libs << 'lib' << 'test'
8
+ test.test_files = FileList['test/*_test.rb']
9
+ test.verbose = true
10
+ test.warning = true
11
+ end
12
+
13
+ task :default => [:test]
@@ -0,0 +1,79 @@
1
+ require 'lingua'
2
+
3
+ require 'rhopalic/contractions'
4
+ require 'rhopalic/phrase'
5
+
6
+ module Rhopalic
7
+ class Analysis
8
+
9
+ def initialize(dictionary=nil)
10
+ @dictionary = dictionary
11
+ end
12
+
13
+ def analyze_phrase(phrase)
14
+ words = []
15
+ indices = []
16
+ syllable_counts = []
17
+ in_dictionary = []
18
+ is_letter_rhopalic = true
19
+ is_syllable_rhopalic = true
20
+
21
+ # TODO this word definition is too simple. Needs to handle:
22
+ # - numbers
23
+ phrase.scan(/[[:alpha:]]+/) do
24
+ match = Regexp.last_match
25
+ word = match[0]
26
+ index = match.begin(0)
27
+
28
+ # Checking whether the previous and this word form a known contraction
29
+ # or possessive.
30
+ if !indices.empty? && (phrase[indices.last + words.last.length] == "'") &&
31
+ (index == indices.last + words.last.length + 1)
32
+ contraction = words.last + "'" + word
33
+ if (syllable_count = CONTRACTIONS[contraction.downcase]) || word.downcase == "s"
34
+ words[-1] = contraction
35
+ if syllable_count
36
+ syllable_counts[-1] = syllable_count
37
+ in_dictionary[-1] = true
38
+ end
39
+
40
+ is_letter_rhopalic = false unless word_sequence_rhopalic?(words)
41
+ is_syllable_rhopalic = false unless syllable_sequence_rhopalic?(syllable_counts)
42
+ return nil unless is_letter_rhopalic || is_syllable_rhopalic
43
+ next
44
+ end
45
+ end
46
+
47
+ if @dictionary
48
+ syllable_count = @dictionary.syllable_count(word)
49
+ in_dictionary.push(true) unless syllable_count.nil?
50
+ end
51
+ if !syllable_count
52
+ syllable_count = Lingua::EN::Syllable.syllables(word)
53
+ in_dictionary.push(false)
54
+ end
55
+
56
+ words.push(word)
57
+ indices.push(match.begin(0))
58
+ syllable_counts.push(syllable_count)
59
+
60
+ is_letter_rhopalic = false unless word_sequence_rhopalic?(words)
61
+ is_syllable_rhopalic = false unless syllable_sequence_rhopalic?(syllable_counts)
62
+ return nil unless is_letter_rhopalic || is_syllable_rhopalic
63
+ end
64
+
65
+ return Phrase.new(phrase, is_letter_rhopalic, is_syllable_rhopalic, words, indices,
66
+ syllable_counts, in_dictionary)
67
+ end
68
+
69
+ private
70
+
71
+ def word_sequence_rhopalic?(words)
72
+ words.size < 2 || words[-2].length + 1 == words[-1].length
73
+ end
74
+
75
+ def syllable_sequence_rhopalic?(syllable_counts)
76
+ syllable_counts.size < 2 || syllable_counts[-2] + 1 == syllable_counts[-1]
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,63 @@
1
+ module Rhopalic
2
+
3
+ # Known contractions in English mapped to syllable counts.
4
+ CONTRACTIONS = {
5
+ "aren't" => 1,
6
+ "can't" => 1,
7
+ "couldn't" => 2,
8
+ "didn't" => 2,
9
+ "doesn't" => 2,
10
+ "don't" => 1,
11
+ "hadn't" => 2,
12
+ "hasn't" => 2,
13
+ "haven't" => 2,
14
+ "he'd" => 1,
15
+ "he'll" => 1,
16
+ "he's" => 1,
17
+ "i'd" => 1,
18
+ "i'll" => 1,
19
+ "i'm" => 1,
20
+ "i've" => 1,
21
+ "isn't" => 2,
22
+ "it'll" => 2,
23
+ "it's" => 1,
24
+ "let's" => 1,
25
+ "mightn't" => 2,
26
+ "mustn't" => 2,
27
+ "shan't" => 1,
28
+ "she'd" => 1,
29
+ "she'll" => 1,
30
+ "she's" => 1,
31
+ "shouldn't" => 2,
32
+ "that's" => 1,
33
+ "there'd" => 1,
34
+ "there'll" => 1,
35
+ "there's" => 1,
36
+ "they'd" => 1,
37
+ "they'll" => 1,
38
+ "they're" => 1,
39
+ "they've" => 1,
40
+ "we'd" => 1,
41
+ "we're" => 1,
42
+ "we've" => 1,
43
+ "weren't" => 2,
44
+ "what'll" => 2,
45
+ "what're" => 2,
46
+ "what's" => 1,
47
+ "what've" => 2,
48
+ "where's" => 1,
49
+ "who'd" => 1,
50
+ "who'll" => 1,
51
+ "who're" => 1,
52
+ "who's" => 1,
53
+ "who've" => 1,
54
+ "won't" => 1,
55
+ "wouldn't" => 2,
56
+ "y'all" => 1,
57
+ "you'd" => 1,
58
+ "you'll" => 1,
59
+ "you're" => 1,
60
+ "you've" => 1,
61
+ }
62
+
63
+ end
@@ -0,0 +1,44 @@
1
+ module Rhopalic
2
+
3
+ # A dictionary that maps words to of syllables. Can be passed into
4
+ # Rhopalic::Analysis to improve accuracy of syllable detection. Input
5
+ # is a pronunciation file in the format of the CMU pronunciation
6
+ # dictionary. See http://www.speech.cs.cmu.edu/cgi-bin/cmudict for
7
+ # details. The latest dictionary file from CMU should work out of the
8
+ # box.
9
+ class Dictionary
10
+
11
+ def self.from_file(filename)
12
+ dict = nil
13
+ File.open(filename) do |file|
14
+ dict = Dictionary.new(file)
15
+ end
16
+ dict
17
+ end
18
+
19
+ # Initializes a dictionary from an enumerable source of dictionary
20
+ # entries, e.g., an open dictionary file.
21
+ def initialize(input_source)
22
+ @input_source = input_source
23
+ make_dictionary
24
+ end
25
+
26
+ def syllable_count(word)
27
+ @syllable_counts[word.upcase] || nil
28
+ end
29
+
30
+ private
31
+
32
+ def make_dictionary
33
+ @syllable_counts = {}
34
+ @input_source.each do |line|
35
+ next if line !~ /^[A-Z]/i
36
+ line.chomp!
37
+ (word, *phonemes) = line.split(/ +/)
38
+ next if word[-1] == ")" # ignore alternative pronunciations
39
+ @syllable_counts[word.upcase] = phonemes.grep(/^[AEIOU]/i).length
40
+ end
41
+ end
42
+ end
43
+
44
+ end
@@ -0,0 +1,31 @@
1
+ require 'lingua'
2
+
3
+ module Rhopalic
4
+
5
+ # A rhopalic phrase with all the artifacts from the rhopalic analysis.
6
+ class Phrase
7
+
8
+ attr_reader :phrase, :is_letter_rhopalic, :is_syllable_rhopalic, :words, :indices,
9
+ :syllable_counts, :in_dictionary
10
+ alias_method :letter_rhopalic?, :is_letter_rhopalic
11
+ alias_method :syllable_rhopalic?, :is_syllable_rhopalic
12
+
13
+ def initialize(phrase, is_letter_rhopalic, is_syllable_rhopalic, words, indices,
14
+ syllable_counts, in_dictionary)
15
+ @phrase = phrase
16
+ @is_letter_rhopalic = is_letter_rhopalic
17
+ @is_syllable_rhopalic = is_syllable_rhopalic
18
+ @words = words
19
+ @indices = indices
20
+ @syllable_counts = syllable_counts
21
+ @in_dictionary = in_dictionary
22
+ end
23
+
24
+ def each_word
25
+ words.zip(indices, syllable_counts, in_dictionary) do |args|
26
+ yield args[0], args[1], args[2], args[3]
27
+ end
28
+ end
29
+ end
30
+
31
+ end
@@ -0,0 +1,29 @@
1
+ require 'rhopalic/analysis'
2
+
3
+ module Rhopalic
4
+
5
+ # Returns the analyzed phrase if it's rhopalic, nil otherwise.
6
+ def self.analyze_phrase(phrase)
7
+ Analysis.new.analyze_phrase(phrase)
8
+ end
9
+
10
+ # Returns whether the given phrase is letter-rhopalic in English (each
11
+ # word has one letter more than the preceding one).
12
+ def self.letter_rhopalic?(phrase)
13
+ analyzed = analyze_phrase(phrase)
14
+ !analyzed.nil? && analyzed.letter_rhopalic?
15
+ end
16
+
17
+ # Returns whether the given phrase is syllable-rhopalic in English (each
18
+ # word has one syllable more than the preceding one).
19
+ def self.syllable_rhopalic?(phrase)
20
+ analyzed = analyze_phrase(phrase)
21
+ !analyzed.nil? && analyzed.syllable_rhopalic?
22
+ end
23
+
24
+ # Returns whether the given phrase is letter-rhopalic or syllable-rhopalic.
25
+ def self.rhopalic?(phrase)
26
+ !analyze_phrase(phrase).nil?
27
+ end
28
+
29
+ end
@@ -0,0 +1,3 @@
1
+ module Rhopalic
2
+ VERSION = '0.0.1.pre'
3
+ end
data/lib/rhopalic.rb ADDED
@@ -0,0 +1,2 @@
1
+ require 'rhopalic/rhopalic'
2
+ require 'rhopalic/version'
data/rhopalic.gemspec ADDED
@@ -0,0 +1,21 @@
1
+ $:.push File.expand_path("../lib", __FILE__)
2
+ require 'rhopalic/version'
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = 'rhopalic'
6
+ s.version = Rhopalic::VERSION
7
+ s.platform = Gem::Platform::RUBY
8
+ s.authors = ['Nik Haldimann']
9
+ s.email = ['nhaldimann@gmail.com']
10
+ s.homepage = 'https://github.com/nikhaldi/rhopalic-ruby'
11
+ s.summary = 'Detects rhopalic phrases'
12
+ s.description = 'Detects rhopalic phrases'
13
+
14
+ s.add_runtime_dependency 'lingua', '~> 0.6.2'
15
+ s.add_development_dependency 'param_test', '~> 0.0.2'
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ['lib']
21
+ end
@@ -0,0 +1,27 @@
1
+ require 'param_test'
2
+ require 'test/unit'
3
+
4
+ require 'rhopalic/analysis'
5
+ require 'rhopalic/dictionary'
6
+
7
+ class Rhopalic::AnalysisTest < ActiveSupport::TestCase
8
+
9
+ def setup
10
+ @dict = Rhopalic::Dictionary.new([
11
+ "ATTENUATE AH0 T EH1 N Y UW0 EY2 T",
12
+ "GRADUALLY G R AE1 JH UW0 AH0 L IY0",
13
+ "WHILE W AY1 L",
14
+ ])
15
+ @analysis = Rhopalic::Analysis.new(@dict)
16
+ end
17
+
18
+ param_test "phrase %s has words in dictionary %s", [
19
+ ["While shadows, lengthening, attenuate", [true, false, false, true]],
20
+ ["Add extra syllables gradually", [false, false, false, true]],
21
+ ] do |phrase, expected_in_dictionary|
22
+ phrase = @analysis.analyze_phrase(phrase)
23
+ assert_not_nil phrase
24
+ assert phrase.syllable_rhopalic?
25
+ assert_equal expected_in_dictionary, phrase.in_dictionary
26
+ end
27
+ end
@@ -0,0 +1,24 @@
1
+ require 'test/unit'
2
+
3
+ require 'rhopalic/dictionary'
4
+
5
+ class Rhopalic::DictionaryTest < Test::Unit::TestCase
6
+
7
+ def test_constructor
8
+ source = [
9
+ ";;; comment",
10
+ "\"QUOTE K W OW1 T",
11
+ "ABATEMENT AH0 B EY1 T M AH0 N T",
12
+ "CAFETERIA K AE2 F AH0 T IH1 R IY0 AH0",
13
+ "CAFETERIA(1) K AE2 F AH0 T IH1 R IY0 AH0",
14
+ ]
15
+ dict = Rhopalic::Dictionary.new(source)
16
+ assert_nil dict.syllable_count("foo")
17
+ assert_equal 3, dict.syllable_count("abatement")
18
+ assert_equal 3, dict.syllable_count("Abatement")
19
+ assert_equal 5, dict.syllable_count("cafeteria")
20
+ assert_nil dict.syllable_count(";;;")
21
+ assert_nil dict.syllable_count("\"quote")
22
+ assert_nil dict.syllable_count("cafeteria(1)")
23
+ end
24
+ end
@@ -0,0 +1,27 @@
1
+ require 'test/unit'
2
+
3
+ require 'rhopalic/phrase'
4
+
5
+ class Rhopalic::PhraseTest < Test::Unit::TestCase
6
+
7
+ def test_constructor_inits_accessors
8
+ phrase = Rhopalic::Phrase.new("one", true, true, ["one"], [0], [1], [false])
9
+ assert_equal "one", phrase.phrase
10
+ assert phrase.letter_rhopalic?
11
+ assert phrase.syllable_rhopalic?
12
+ assert_equal ["one"], phrase.words
13
+ assert_equal [0], phrase.indices
14
+ assert_equal [1], phrase.syllable_counts
15
+ assert_equal [false], phrase.in_dictionary
16
+ end
17
+
18
+ def test_each_word
19
+ phrase = Rhopalic::Phrase.new("one four", true, false, ["one", "four"], [0, 4], [1, 1],
20
+ [false, false])
21
+ results = []
22
+ phrase.each_word do |word, index, syllable_count, in_dictionary|
23
+ results.push([word, index, syllable_count, in_dictionary])
24
+ end
25
+ assert_equal [["one", 0, 1, false], ["four", 4, 1, false]], results
26
+ end
27
+ end
@@ -0,0 +1,63 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'param_test'
4
+ require 'test/unit'
5
+
6
+ require 'rhopalic'
7
+
8
+ class Rhopalic::RhopalicTest < ActiveSupport::TestCase
9
+
10
+ param_test "phrase %s is letter-rhopalic", [
11
+ "a",
12
+ "a be",
13
+ "the fine horse",
14
+ "it's fancy",
15
+ "IT'S FANCY",
16
+ "it's'bleak matter",
17
+ "rose's stencil",
18
+ ] do |phrase|
19
+ assert Rhopalic.letter_rhopalic?(phrase)
20
+ assert Rhopalic.rhopalic?(phrase)
21
+ end
22
+
23
+ param_test "phrase %s is syllable-rhopalic", [
24
+ "a",
25
+ "Words along rhopalic pentameters",
26
+ "Lines thicken approaching termination.",
27
+ "it's fancy",
28
+ "IT'S FANCY",
29
+ "bloom beta",
30
+ "phil's matter"
31
+ ] do |phrase|
32
+ assert Rhopalic.syllable_rhopalic?(phrase)
33
+ assert Rhopalic.rhopalic?(phrase)
34
+ end
35
+
36
+ param_test "phrase %s is not rhopalic", [
37
+ "a b",
38
+ "be do",
39
+ "a be ce",
40
+ "a it's fancy",
41
+ ] do |phrase|
42
+ assert !Rhopalic.rhopalic?(phrase)
43
+ end
44
+
45
+ param_test "phrase %s with accented characters is rhopalic",
46
+ ["the café"] do |phrase|
47
+ assert Rhopalic.rhopalic?(phrase)
48
+ end
49
+
50
+ def test_analyze_phrase_not_rhopalic
51
+ assert_nil Rhopalic.analyze_phrase("three two")
52
+ end
53
+
54
+ def test_analyze_phrase
55
+ phrase = Rhopalic.analyze_phrase("two four")
56
+ assert_equal "two four", phrase.phrase
57
+ assert phrase.letter_rhopalic?
58
+ assert !phrase.syllable_rhopalic?
59
+ assert_equal ["two", "four"], phrase.words
60
+ assert_equal [0, 4], phrase.indices
61
+ assert_equal [1, 1], phrase.syllable_counts
62
+ end
63
+ end
metadata ADDED
@@ -0,0 +1,99 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rhopalic
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1.pre
5
+ prerelease: 6
6
+ platform: ruby
7
+ authors:
8
+ - Nik Haldimann
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-04-10 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: lingua
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 0.6.2
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 0.6.2
30
+ - !ruby/object:Gem::Dependency
31
+ name: param_test
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 0.0.2
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 0.0.2
46
+ description: Detects rhopalic phrases
47
+ email:
48
+ - nhaldimann@gmail.com
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files: []
52
+ files:
53
+ - .gitignore
54
+ - Gemfile
55
+ - LICENSE.md
56
+ - README.md
57
+ - Rakefile
58
+ - lib/rhopalic.rb
59
+ - lib/rhopalic/analysis.rb
60
+ - lib/rhopalic/contractions.rb
61
+ - lib/rhopalic/dictionary.rb
62
+ - lib/rhopalic/phrase.rb
63
+ - lib/rhopalic/rhopalic.rb
64
+ - lib/rhopalic/version.rb
65
+ - rhopalic.gemspec
66
+ - test/analysis_test.rb
67
+ - test/dictionary_test.rb
68
+ - test/phrase_test.rb
69
+ - test/rhopalic_test.rb
70
+ homepage: https://github.com/nikhaldi/rhopalic-ruby
71
+ licenses: []
72
+ post_install_message:
73
+ rdoc_options: []
74
+ require_paths:
75
+ - lib
76
+ required_ruby_version: !ruby/object:Gem::Requirement
77
+ none: false
78
+ requirements:
79
+ - - ! '>='
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ required_rubygems_version: !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ! '>'
86
+ - !ruby/object:Gem::Version
87
+ version: 1.3.1
88
+ requirements: []
89
+ rubyforge_project:
90
+ rubygems_version: 1.8.23
91
+ signing_key:
92
+ specification_version: 3
93
+ summary: Detects rhopalic phrases
94
+ test_files:
95
+ - test/analysis_test.rb
96
+ - test/dictionary_test.rb
97
+ - test/phrase_test.rb
98
+ - test/rhopalic_test.rb
99
+ has_rdoc: