indoor_voice 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5af440632e16925a955aadd1d1d788d0d07e9b32
4
- data.tar.gz: 75a3a8afb8df286336d56ec889d890e2a578b249
3
+ metadata.gz: 7c1f354160ec3defa55dd660399cc0963cdad5b6
4
+ data.tar.gz: b3db382d94fb98da2893a329a371cab996df1d21
5
5
  SHA512:
6
- metadata.gz: 1d0aeec7a2dd9b28bff8f1c390bbe3789d170b9ebae70302ba8fe43c48dc893f3a15c2fc679116f3433e3b78de9b8537d8cdcf214551c00e1f5b64afc72d3efe
7
- data.tar.gz: 508cd37f8ebb3e6e247d004e3a1cdcf1932fa64f2d019a2f7b4d57787561272453726cbca4a97a3308f44d02de324d10d01c641f25f48318736af7a1318e6f6f
6
+ metadata.gz: e5fd5ed460d7a47b4c63d2f8d0b3c423e81249c8855d4a3c5eede696eb1a9f6f697b9142b07fa8a8ff441f05a03c09d5284ae33dff93a6a694cdc8f0a5e70b46
7
+ data.tar.gz: 22d03b2b980d189f8ffe4f1ccd6bc062d787f4147627da04efabfdf19a76f45e6d6b00e9c1c6b7d3eae5bea0e01e3d79558732d6470acc7aa2e34fbf0e891de0
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2014 James McKinney
1
+ Copyright (c) 2015 James McKinney
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -15,16 +15,25 @@ require 'open-uri'
15
15
 
16
16
  require 'indoor_voice'
17
17
 
18
- # You can use any word list and any language. Here's we use Scrabble words.
18
+ # You can use any word list. Here we use Scrabble words.
19
19
  url = 'https://scrabblehelper.googlecode.com/svn/trunk/ScrabbleHelper/src/dictionaries/TWL06.txt'
20
20
  words = open(url).readlines.map(&:chomp)
21
21
 
22
- # :en is the BCP 47 code for English.
22
+ # You can use any language. :en is the BCP 47 code for English.
23
23
  model = IndoorVoice.new(words, :en)
24
- model.setup
24
+ model.setup # wait a moment
25
25
 
26
- model.downcase('HP AND IBM ARE TECHNOLOGY CORPORATIONS.')
27
- # => "HP and IBM are technology corporations."
26
+ model.downcase('HP, IBM AND MICROSOFT ARE TECHNOLOGY CORPORATIONS.')
27
+ # => "HP, IBM and microsoft are technology corporations."
28
+
29
+ model.titlecase('HP, IBM AND MICROSOFT ARE TECHNOLOGY CORPORATIONS.')
30
+ # => "HP, IBM And Microsoft Are Technology Corporations."
31
+
32
+ model.titlecase('HP, IBM AND MICROSOFT ARE TECHNOLOGY CORPORATIONS.', except: %w(a an and as at but by en for if in of on or the to via))
33
+ # => "HP, IBM and Microsoft Are Technology Corporations."
34
+
35
+ model.titlecase('HP, IBM AND MICROSOFT ARE TECHNOLOGY CORPORATIONS.', except: words)
36
+ # => "HP, IBM and Microsoft are technology corporations."
28
37
  ```
29
38
 
30
39
  ![This gem is magic.](http://i.giphy.com/ol57TlMlftsQg.gif)
@@ -41,4 +50,4 @@ No gem for titlecasing dealt with acronyms well. In case this gem doesn't suit y
41
50
  * [namecase](https://rubygems.org/gems/namecase), based on [Lingua::EN::NameCase](http://search.cpan.org/~barbie/Lingua-EN-NameCase-1.19/lib/Lingua/EN/NameCase.pm) by Mark Summerfield
42
51
  * [clever_title](https://rubygems.org/gems/clever_title)
43
52
 
44
- Copyright (c) 2014 James McKinney, released under the MIT license
53
+ Copyright (c) 2015 James McKinney, released under the MIT license
data/lib/indoor_voice.rb CHANGED
@@ -1,7 +1,9 @@
1
1
  require 'set'
2
2
 
3
+ require 'unicode_utils/casefold'
3
4
  require 'unicode_utils/downcase'
4
5
  require 'unicode_utils/each_word'
6
+ require 'unicode_utils/titlecase'
5
7
  require 'unicode_utils/upcase'
6
8
 
7
9
  # Lowercases uppercase strings excluding acronyms.
@@ -12,10 +14,10 @@ class IndoorVoice
12
14
  # @return [Symbol] the language's BCP 47 code
13
15
  attr_reader :language_id
14
16
 
15
- # @return [Array<String>] the characters in the language
17
+ # @return [Set<String>] the characters in the language
16
18
  attr_reader :characters
17
19
 
18
- # @return [Array<Regexp>] regular expressions for non-word character sequences
20
+ # @return [Array<Regexp>] regular expressions matching non-word character sequences
19
21
  attr_reader :patterns
20
22
 
21
23
  # Excludes "\t", "\n", "\v", "\f", "\r", " " and "#".
@@ -107,4 +109,26 @@ class IndoorVoice
107
109
  end
108
110
  end.join
109
111
  end
112
+
113
+ # Titlecases all words except for acronyms.
114
+ #
115
+ # @param [String] string a string
116
+ # @param [Hash] options optional arguments
117
+ # @option options [Array<String>] :except words to downcase, e.g. conjunctions
118
+ # @return [String] a string with acronyms in uppercase and others in titlecase
119
+ def titlecase(string, options = {})
120
+ blacklist = options.fetch(:except, []).map do |word|
121
+ UnicodeUtils.casefold(word)
122
+ end
123
+
124
+ UnicodeUtils.each_word(string).map do |word|
125
+ if @patterns.any?{|pattern| word[pattern]}
126
+ word
127
+ elsif blacklist.include?(UnicodeUtils.casefold(word))
128
+ UnicodeUtils.downcase(word, @language_id)
129
+ else
130
+ UnicodeUtils.titlecase(word, @language_id)
131
+ end
132
+ end.join
133
+ end
110
134
  end
@@ -1,3 +1,3 @@
1
1
  class IndoorVoice
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -1,20 +1,48 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe IndoorVoice do
4
- describe '#downcase' do
5
- # @see http://en.wikipedia.org/wiki/Most_common_words_in_English
6
- let :words do
7
- %w(the be to of and a in that have I it for not on with he as you do at this but his by from they we say her she or an will my one all would there their what so up out if about who get which go me when make can like time no just him know take people into year your good some could them see other than then now look only come its over think also back after use two how our work first well way even new want because any these give day most us)
4
+ # @see http://en.wikipedia.org/wiki/Most_common_words_in_English
5
+ let :words do
6
+ %w(the be to of and a in that have I it for not on with he as you do at this but his by from they we say her she or an will my one all would there their what so up out if about who get which go me when make can like time no just him know take people into year your good some could them see other than then now look only come its over think also back after use two how our work first well way even new want because any these give day most us)
7
+ end
8
+
9
+ let :model do
10
+ model = IndoorVoice.new(words, :en)
11
+ model.setup
12
+ model
13
+ end
14
+
15
+ describe '#words' do
16
+ it 'should return the words in the language' do
17
+ expect(model.words).to eq(words)
18
+ end
19
+ end
20
+
21
+ describe '#language_id' do
22
+ it 'should return the language ID' do
23
+ expect(model.language_id).to eq(:en)
8
24
  end
25
+ end
9
26
 
10
- let :model do
11
- model = IndoorVoice.new(words, :en)
12
- model.setup
13
- model
27
+ describe '#characters' do
28
+ it 'should return the characters in the language' do
29
+ expect(model.characters).to eq(Set.new(%w(A B C D E F G H I J K L M N O P R S T U V W Y)))
14
30
  end
31
+ end
15
32
 
33
+ describe '#downcase' do
16
34
  it 'should downcase only non-acronyms' do
17
- expect(model.downcase('THE CAT WILL BEAT THE ROBOT')).to eq('the cat will beat the ROBOT')
35
+ expect(model.downcase('THE CAT WILL BEAT THE ROBOT')).to eq('the cat will beat the ROBOT') # "cat" and "beat" are not in the word list
36
+ end
37
+ end
38
+
39
+ describe '#titlecase' do
40
+ it 'should titlecase only non-acronyms' do
41
+ expect(model.titlecase('THE CAT WILL BEAT THE ROBOT')).to eq('The Cat Will Beat The ROBOT')
42
+ end
43
+
44
+ it 'should titlecase only non-acronyms and non-exceptions' do
45
+ expect(model.titlecase('THE CAT WILL BEAT THE ROBOT', except: words)).to eq('the Cat will Beat the ROBOT')
18
46
  end
19
47
  end
20
48
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: indoor_voice
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - James McKinney
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-27 00:00:00.000000000 Z
11
+ date: 2015-03-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: unicode_utils