indoor_voice 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.rspec +2 -0
- data/.travis.yml +6 -0
- data/.yardopts +4 -0
- data/Gemfile +4 -0
- data/LICENSE +20 -0
- data/README.md +44 -0
- data/Rakefile +16 -0
- data/USAGE +1 -0
- data/indoor_voice.gemspec +24 -0
- data/lib/indoor_voice.rb +110 -0
- data/lib/indoor_voice/version.rb +3 -0
- data/spec/indoor_voice_spec.rb +20 -0
- data/spec/spec_helper.rb +11 -0
- metadata +129 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 5af440632e16925a955aadd1d1d788d0d07e9b32
|
4
|
+
data.tar.gz: 75a3a8afb8df286336d56ec889d890e2a578b249
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1d0aeec7a2dd9b28bff8f1c390bbe3789d170b9ebae70302ba8fe43c48dc893f3a15c2fc679116f3433e3b78de9b8537d8cdcf214551c00e1f5b64afc72d3efe
|
7
|
+
data.tar.gz: 508cd37f8ebb3e6e247d004e3a1cdcf1932fa64f2d019a2f7b4d57787561272453726cbca4a97a3308f44d02de324d10d01c641f25f48318736af7a1318e6f6f
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/.yardopts
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2014 James McKinney
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# IndoorVoice: Lowercase all-caps strings excluding acronyms
|
2
|
+
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/indoor_voice.svg)](https://badge.fury.io/rb/indoor_voice)
|
4
|
+
[![Build Status](https://secure.travis-ci.org/jpmckinney/indoor_voice.png)](https://travis-ci.org/jpmckinney/indoor_voice)
|
5
|
+
[![Dependency Status](https://gemnasium.com/jpmckinney/indoor_voice.png)](https://gemnasium.com/jpmckinney/indoor_voice)
|
6
|
+
[![Coverage Status](https://coveralls.io/repos/jpmckinney/indoor_voice/badge.png)](https://coveralls.io/r/jpmckinney/indoor_voice)
|
7
|
+
[![Code Climate](https://codeclimate.com/github/jpmckinney/indoor_voice.png)](https://codeclimate.com/github/jpmckinney/indoor_voice)
|
8
|
+
|
9
|
+
DOES YOUR DATA CONTAIN ALL-CAPS TEXT THAT YOU WISH WAS PROPERLY CASED?
|
10
|
+
|
11
|
+
Have your data use its indoor voice.
|
12
|
+
|
13
|
+
```ruby
|
14
|
+
require 'open-uri'
|
15
|
+
|
16
|
+
require 'indoor_voice'
|
17
|
+
|
18
|
+
# You can use any word list and any language. Here's we use Scrabble words.
|
19
|
+
url = 'https://scrabblehelper.googlecode.com/svn/trunk/ScrabbleHelper/src/dictionaries/TWL06.txt'
|
20
|
+
words = open(url).readlines.map(&:chomp)
|
21
|
+
|
22
|
+
# :en is the BCP 47 code for English.
|
23
|
+
model = IndoorVoice.new(words, :en)
|
24
|
+
model.setup
|
25
|
+
|
26
|
+
model.downcase('HP AND IBM ARE TECHNOLOGY CORPORATIONS.')
|
27
|
+
# => "HP and IBM are technology corporations."
|
28
|
+
```
|
29
|
+
|
30
|
+
![This gem is magic.](http://i.giphy.com/ol57TlMlftsQg.gif)
|
31
|
+
|
32
|
+
IndoorVoice is based on the assumption that most acronyms contain non-word character sequences. For example, no English word has the character sequence `bm` in a word-final position, therefore `IBM` must be an acronym.
|
33
|
+
|
34
|
+
Once you have a string with only acronyms in uppercase, you can (in your own code) selectively uppercase letters, like the first letter in each sentence, or the first letter of each word. Since most titlecasing gems recase acronyms, titlecasing is a planned feature.
|
35
|
+
|
36
|
+
# Why?
|
37
|
+
|
38
|
+
No gem for titlecasing dealt with acronyms well. In case this gem doesn't suit your needs, see:
|
39
|
+
|
40
|
+
* [titleize](https://rubygems.org/gems/titleize), [titlecase](https://rubygems.org/gems/titlecase), [title_case](https://rubygems.org/gems/title_case) and [gruber-case](https://rubygems.org/gems/gruber-case), based on [TitleCase.pl](http://daringfireball.net/2008/05/title_case) by John Gruber
|
41
|
+
* [namecase](https://rubygems.org/gems/namecase), based on [Lingua::EN::NameCase](http://search.cpan.org/~barbie/Lingua-EN-NameCase-1.19/lib/Lingua/EN/NameCase.pm) by Mark Summerfield
|
42
|
+
* [clever_title](https://rubygems.org/gems/clever_title)
|
43
|
+
|
44
|
+
Copyright (c) 2014 James McKinney, released under the MIT license
|
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
Bundler::GemHelper.install_tasks
|
3
|
+
|
4
|
+
require 'rspec/core/rake_task'
|
5
|
+
RSpec::Core::RakeTask.new(:spec)
|
6
|
+
|
7
|
+
task :default => :spec
|
8
|
+
|
9
|
+
begin
|
10
|
+
require 'yard'
|
11
|
+
YARD::Rake::YardocTask.new
|
12
|
+
rescue LoadError
|
13
|
+
task :yard do
|
14
|
+
abort 'YARD is not available. In order to run yard, you must: gem install yard'
|
15
|
+
end
|
16
|
+
end
|
data/USAGE
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
See README.md for full usage details.
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/indoor_voice/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = "indoor_voice"
|
6
|
+
s.version = IndoorVoice::VERSION
|
7
|
+
s.platform = Gem::Platform::RUBY
|
8
|
+
s.authors = ["James McKinney"]
|
9
|
+
s.homepage = "https://github.com/jpmckinney/indoor_voice"
|
10
|
+
s.summary = %q{Lowercase all-caps strings excluding acronyms}
|
11
|
+
s.license = 'MIT'
|
12
|
+
|
13
|
+
s.files = `git ls-files`.split("\n")
|
14
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
15
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
16
|
+
s.require_paths = ["lib"]
|
17
|
+
|
18
|
+
s.add_runtime_dependency('unicode_utils', '~> 1.4.0')
|
19
|
+
|
20
|
+
s.add_development_dependency('coveralls')
|
21
|
+
s.add_development_dependency('json', '~> 1.8') # to silence coveralls warning
|
22
|
+
s.add_development_dependency('rake')
|
23
|
+
s.add_development_dependency('rspec', '~> 3.1')
|
24
|
+
end
|
data/lib/indoor_voice.rb
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
require 'unicode_utils/downcase'
|
4
|
+
require 'unicode_utils/each_word'
|
5
|
+
require 'unicode_utils/upcase'
|
6
|
+
|
7
|
+
# Lowercases uppercase strings excluding acronyms.
|
8
|
+
class IndoorVoice
|
9
|
+
# @return [Array<String>] the words in the language
|
10
|
+
attr_reader :words
|
11
|
+
|
12
|
+
# @return [Symbol] the language's BCP 47 code
|
13
|
+
attr_reader :language_id
|
14
|
+
|
15
|
+
# @return [Array<String>] the characters in the language
|
16
|
+
attr_reader :characters
|
17
|
+
|
18
|
+
# @return [Array<Regexp>] regular expressions for non-word character sequences
|
19
|
+
attr_reader :patterns
|
20
|
+
|
21
|
+
# Excludes "\t", "\n", "\v", "\f", "\r", " " and "#".
|
22
|
+
# "(", ")", "*", "+", "?", "[" and "\\" can cause SyntaxError.
|
23
|
+
SPECIAL_CHARACTERS = Set.new(["$", "(", ")", "*", "+", "-", ".", "?", "[", "\\", "]", "^", "{", "|", "}"])
|
24
|
+
|
25
|
+
# @param [Array<String>] words the words in the language
|
26
|
+
# @param [Symbol] language_id the language's BCP 47 code
|
27
|
+
# @see http://tools.ietf.org/html/bcp47
|
28
|
+
def initialize(words, language_id = :en)
|
29
|
+
@words = words
|
30
|
+
@language_id = language_id
|
31
|
+
@characters = Set.new
|
32
|
+
@patterns = []
|
33
|
+
end
|
34
|
+
|
35
|
+
# Determines the regular expressions for non-word character sequences.
|
36
|
+
def setup
|
37
|
+
types = [:prefix, :suffix, :infix]
|
38
|
+
|
39
|
+
data = {}
|
40
|
+
types.each do |type|
|
41
|
+
data[type] = Hash.new do |hash,character|
|
42
|
+
hash[character] = Set.new
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Determine the possible two-character sequences. Should be O(n) of all characters in all words.
|
47
|
+
@words.each do |word|
|
48
|
+
chars = UnicodeUtils.upcase(word, @language_id).chars
|
49
|
+
@characters.merge(chars)
|
50
|
+
if chars.size > 1
|
51
|
+
data[:prefix][chars[0]].add(chars[1])
|
52
|
+
data[:suffix][chars[-2]].add(chars[-1])
|
53
|
+
chars[2..-2].each_with_index do |character,i|
|
54
|
+
data[:infix][chars[i + 1]].add(character)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Determine whether the language contains any characters requiring escaping.
|
60
|
+
escape = @characters.intersect?(SPECIAL_CHARACTERS)
|
61
|
+
|
62
|
+
patterns = {}
|
63
|
+
types.each do |type|
|
64
|
+
patterns[type] = []
|
65
|
+
end
|
66
|
+
|
67
|
+
# Build the regular expressions for non-word character sequences.
|
68
|
+
data.each do |type,hash|
|
69
|
+
hash.each do |character,set|
|
70
|
+
difference = @characters - set
|
71
|
+
unless difference.empty?
|
72
|
+
# The first condition is just to make the patterns more human-readable.
|
73
|
+
patterns[type] << if difference.one?
|
74
|
+
if escape
|
75
|
+
"#{Regexp.escape(character)}#{Regexp.escape(difference.to_a.first)}"
|
76
|
+
else
|
77
|
+
"#{character}#{difference.to_a.first}"
|
78
|
+
end
|
79
|
+
else
|
80
|
+
if escape
|
81
|
+
"#{Regexp.escape(character)}[#{Regexp.escape(difference.to_a.join)}]"
|
82
|
+
else
|
83
|
+
"#{character}[#{difference.to_a.join}]"
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
@patterns = [
|
91
|
+
Regexp.new("^(?:#{patterns[:prefix].join('|')})"),
|
92
|
+
Regexp.new("(?:#{patterns[:suffix].join('|')})$"),
|
93
|
+
Regexp.new("\\B(?:#{patterns[:infix].join('|')})\\B"),
|
94
|
+
]
|
95
|
+
end
|
96
|
+
|
97
|
+
# Downcases all words except for acronyms.
|
98
|
+
#
|
99
|
+
# @param [String] string an uppercase string
|
100
|
+
# @return [String] a string with only acronyms in uppercase
|
101
|
+
def downcase(string)
|
102
|
+
UnicodeUtils.each_word(string).map do |word|
|
103
|
+
if @patterns.any?{|pattern| word[pattern]}
|
104
|
+
word
|
105
|
+
else
|
106
|
+
UnicodeUtils.downcase(word, @language_id)
|
107
|
+
end
|
108
|
+
end.join
|
109
|
+
end
|
110
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe IndoorVoice do
|
4
|
+
describe '#downcase' do
|
5
|
+
# @see http://en.wikipedia.org/wiki/Most_common_words_in_English
|
6
|
+
let :words do
|
7
|
+
%w(the be to of and a in that have I it for not on with he as you do at this but his by from they we say her she or an will my one all would there their what so up out if about who get which go me when make can like time no just him know take people into year your good some could them see other than then now look only come its over think also back after use two how our work first well way even new want because any these give day most us)
|
8
|
+
end
|
9
|
+
|
10
|
+
let :model do
|
11
|
+
model = IndoorVoice.new(words, :en)
|
12
|
+
model.setup
|
13
|
+
model
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'should downcase only non-acronyms' do
|
17
|
+
expect(model.downcase('THE CAT WILL BEAT THE ROBOT')).to eq('the cat will beat the ROBOT')
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,129 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: indoor_voice
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- James McKinney
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-02-27 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: unicode_utils
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.4.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.4.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: coveralls
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: json
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.8'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.8'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '3.1'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '3.1'
|
83
|
+
description:
|
84
|
+
email:
|
85
|
+
executables: []
|
86
|
+
extensions: []
|
87
|
+
extra_rdoc_files: []
|
88
|
+
files:
|
89
|
+
- ".gitignore"
|
90
|
+
- ".rspec"
|
91
|
+
- ".travis.yml"
|
92
|
+
- ".yardopts"
|
93
|
+
- Gemfile
|
94
|
+
- LICENSE
|
95
|
+
- README.md
|
96
|
+
- Rakefile
|
97
|
+
- USAGE
|
98
|
+
- indoor_voice.gemspec
|
99
|
+
- lib/indoor_voice.rb
|
100
|
+
- lib/indoor_voice/version.rb
|
101
|
+
- spec/indoor_voice_spec.rb
|
102
|
+
- spec/spec_helper.rb
|
103
|
+
homepage: https://github.com/jpmckinney/indoor_voice
|
104
|
+
licenses:
|
105
|
+
- MIT
|
106
|
+
metadata: {}
|
107
|
+
post_install_message:
|
108
|
+
rdoc_options: []
|
109
|
+
require_paths:
|
110
|
+
- lib
|
111
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
112
|
+
requirements:
|
113
|
+
- - ">="
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
version: '0'
|
116
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
117
|
+
requirements:
|
118
|
+
- - ">="
|
119
|
+
- !ruby/object:Gem::Version
|
120
|
+
version: '0'
|
121
|
+
requirements: []
|
122
|
+
rubyforge_project:
|
123
|
+
rubygems_version: 2.2.2
|
124
|
+
signing_key:
|
125
|
+
specification_version: 4
|
126
|
+
summary: Lowercase all-caps strings excluding acronyms
|
127
|
+
test_files:
|
128
|
+
- spec/indoor_voice_spec.rb
|
129
|
+
- spec/spec_helper.rb
|