indoor_voice 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/README.md +15 -6
- data/lib/indoor_voice.rb +26 -2
- data/lib/indoor_voice/version.rb +1 -1
- data/spec/indoor_voice_spec.rb +37 -9
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7c1f354160ec3defa55dd660399cc0963cdad5b6
|
4
|
+
data.tar.gz: b3db382d94fb98da2893a329a371cab996df1d21
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e5fd5ed460d7a47b4c63d2f8d0b3c423e81249c8855d4a3c5eede696eb1a9f6f697b9142b07fa8a8ff441f05a03c09d5284ae33dff93a6a694cdc8f0a5e70b46
|
7
|
+
data.tar.gz: 22d03b2b980d189f8ffe4f1ccd6bc062d787f4147627da04efabfdf19a76f45e6d6b00e9c1c6b7d3eae5bea0e01e3d79558732d6470acc7aa2e34fbf0e891de0
|
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -15,16 +15,25 @@ require 'open-uri'
|
|
15
15
|
|
16
16
|
require 'indoor_voice'
|
17
17
|
|
18
|
-
# You can use any word list
|
18
|
+
# You can use any word list. Here we use Scrabble words.
|
19
19
|
url = 'https://scrabblehelper.googlecode.com/svn/trunk/ScrabbleHelper/src/dictionaries/TWL06.txt'
|
20
20
|
words = open(url).readlines.map(&:chomp)
|
21
21
|
|
22
|
-
# :en is the BCP 47 code for English.
|
22
|
+
# You can use any language. :en is the BCP 47 code for English.
|
23
23
|
model = IndoorVoice.new(words, :en)
|
24
|
-
model.setup
|
24
|
+
model.setup # wait a moment
|
25
25
|
|
26
|
-
model.downcase('HP AND
|
27
|
-
# => "HP and
|
26
|
+
model.downcase('HP, IBM AND MICROSOFT ARE TECHNOLOGY CORPORATIONS.')
|
27
|
+
# => "HP, IBM and microsoft are technology corporations."
|
28
|
+
|
29
|
+
model.titlecase('HP, IBM AND MICROSOFT ARE TECHNOLOGY CORPORATIONS.')
|
30
|
+
# => "HP, IBM And Microsoft Are Technology Corporations."
|
31
|
+
|
32
|
+
model.titlecase('HP, IBM AND MICROSOFT ARE TECHNOLOGY CORPORATIONS.', except: %w(a an and as at but by en for if in of on or the to via))
|
33
|
+
# => "HP, IBM and Microsoft Are Technology Corporations."
|
34
|
+
|
35
|
+
model.titlecase('HP, IBM AND MICROSOFT ARE TECHNOLOGY CORPORATIONS.', except: words)
|
36
|
+
# => "HP, IBM and Microsoft are technology corporations."
|
28
37
|
```
|
29
38
|
|
30
39
|
![This gem is magic.](http://i.giphy.com/ol57TlMlftsQg.gif)
|
@@ -41,4 +50,4 @@ No gem for titlecasing dealt with acronyms well. In case this gem doesn't suit y
|
|
41
50
|
* [namecase](https://rubygems.org/gems/namecase), based on [Lingua::EN::NameCase](http://search.cpan.org/~barbie/Lingua-EN-NameCase-1.19/lib/Lingua/EN/NameCase.pm) by Mark Summerfield
|
42
51
|
* [clever_title](https://rubygems.org/gems/clever_title)
|
43
52
|
|
44
|
-
Copyright (c)
|
53
|
+
Copyright (c) 2015 James McKinney, released under the MIT license
|
data/lib/indoor_voice.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
require 'set'
|
2
2
|
|
3
|
+
require 'unicode_utils/casefold'
|
3
4
|
require 'unicode_utils/downcase'
|
4
5
|
require 'unicode_utils/each_word'
|
6
|
+
require 'unicode_utils/titlecase'
|
5
7
|
require 'unicode_utils/upcase'
|
6
8
|
|
7
9
|
# Lowercases uppercase strings excluding acronyms.
|
@@ -12,10 +14,10 @@ class IndoorVoice
|
|
12
14
|
# @return [Symbol] the language's BCP 47 code
|
13
15
|
attr_reader :language_id
|
14
16
|
|
15
|
-
# @return [
|
17
|
+
# @return [Set<String>] the characters in the language
|
16
18
|
attr_reader :characters
|
17
19
|
|
18
|
-
# @return [Array<Regexp>] regular expressions
|
20
|
+
# @return [Array<Regexp>] regular expressions matching non-word character sequences
|
19
21
|
attr_reader :patterns
|
20
22
|
|
21
23
|
# Excludes "\t", "\n", "\v", "\f", "\r", " " and "#".
|
@@ -107,4 +109,26 @@ class IndoorVoice
|
|
107
109
|
end
|
108
110
|
end.join
|
109
111
|
end
|
112
|
+
|
113
|
+
# Titlecases all words except for acronyms.
|
114
|
+
#
|
115
|
+
# @param [String] string a string
|
116
|
+
# @param [Hash] options optional arguments
|
117
|
+
# @option options [Array<String>] :except words to downcase, e.g. conjunctions
|
118
|
+
# @return [String] a string with acronyms in uppercase and others in titlecase
|
119
|
+
def titlecase(string, options = {})
|
120
|
+
blacklist = options.fetch(:except, []).map do |word|
|
121
|
+
UnicodeUtils.casefold(word)
|
122
|
+
end
|
123
|
+
|
124
|
+
UnicodeUtils.each_word(string).map do |word|
|
125
|
+
if @patterns.any?{|pattern| word[pattern]}
|
126
|
+
word
|
127
|
+
elsif blacklist.include?(UnicodeUtils.casefold(word))
|
128
|
+
UnicodeUtils.downcase(word, @language_id)
|
129
|
+
else
|
130
|
+
UnicodeUtils.titlecase(word, @language_id)
|
131
|
+
end
|
132
|
+
end.join
|
133
|
+
end
|
110
134
|
end
|
data/lib/indoor_voice/version.rb
CHANGED
data/spec/indoor_voice_spec.rb
CHANGED
@@ -1,20 +1,48 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
RSpec.describe IndoorVoice do
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
4
|
+
# @see http://en.wikipedia.org/wiki/Most_common_words_in_English
|
5
|
+
let :words do
|
6
|
+
%w(the be to of and a in that have I it for not on with he as you do at this but his by from they we say her she or an will my one all would there their what so up out if about who get which go me when make can like time no just him know take people into year your good some could them see other than then now look only come its over think also back after use two how our work first well way even new want because any these give day most us)
|
7
|
+
end
|
8
|
+
|
9
|
+
let :model do
|
10
|
+
model = IndoorVoice.new(words, :en)
|
11
|
+
model.setup
|
12
|
+
model
|
13
|
+
end
|
14
|
+
|
15
|
+
describe '#words' do
|
16
|
+
it 'should return the words in the language' do
|
17
|
+
expect(model.words).to eq(words)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
describe '#language_id' do
|
22
|
+
it 'should return the language ID' do
|
23
|
+
expect(model.language_id).to eq(:en)
|
8
24
|
end
|
25
|
+
end
|
9
26
|
|
10
|
-
|
11
|
-
|
12
|
-
model.
|
13
|
-
model
|
27
|
+
describe '#characters' do
|
28
|
+
it 'should return the characters in the language' do
|
29
|
+
expect(model.characters).to eq(Set.new(%w(A B C D E F G H I J K L M N O P R S T U V W Y)))
|
14
30
|
end
|
31
|
+
end
|
15
32
|
|
33
|
+
describe '#downcase' do
|
16
34
|
it 'should downcase only non-acronyms' do
|
17
|
-
expect(model.downcase('THE CAT WILL BEAT THE ROBOT')).to eq('the cat will beat the ROBOT')
|
35
|
+
expect(model.downcase('THE CAT WILL BEAT THE ROBOT')).to eq('the cat will beat the ROBOT') # "cat" and "beat" are not in the word list
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
describe '#titlecase' do
|
40
|
+
it 'should titlecase only non-acronyms' do
|
41
|
+
expect(model.titlecase('THE CAT WILL BEAT THE ROBOT')).to eq('The Cat Will Beat The ROBOT')
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'should titlecase only non-acronyms and non-exceptions' do
|
45
|
+
expect(model.titlecase('THE CAT WILL BEAT THE ROBOT', except: words)).to eq('the Cat will Beat the ROBOT')
|
18
46
|
end
|
19
47
|
end
|
20
48
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: indoor_voice
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James McKinney
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-03-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: unicode_utils
|