string_utils 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +30 -0
- data/README +8 -0
- data/Rakefile +9 -0
- data/lib/string_utils.rb +134 -0
- data/lib/string_utils/version.rb +3 -0
- data/string_utils.gemspec +34 -0
- data/test/normalize_name_spec.rb +19 -0
- data/test/truncate_spec.rb +50 -0
- metadata +128 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
string_utils (1.0.0)
|
5
|
+
activesupport (> 2)
|
6
|
+
i18n
|
7
|
+
|
8
|
+
GEM
|
9
|
+
remote: http://rubygems.org/
|
10
|
+
specs:
|
11
|
+
activesupport (3.0.3)
|
12
|
+
diff-lcs (1.1.2)
|
13
|
+
i18n (0.5.0)
|
14
|
+
rspec (2.2.0)
|
15
|
+
rspec-core (~> 2.2)
|
16
|
+
rspec-expectations (~> 2.2)
|
17
|
+
rspec-mocks (~> 2.2)
|
18
|
+
rspec-core (2.2.1)
|
19
|
+
rspec-expectations (2.2.0)
|
20
|
+
diff-lcs (~> 1.1.2)
|
21
|
+
rspec-mocks (2.2.0)
|
22
|
+
|
23
|
+
PLATFORMS
|
24
|
+
ruby
|
25
|
+
|
26
|
+
DEPENDENCIES
|
27
|
+
activesupport (> 2)
|
28
|
+
i18n
|
29
|
+
rspec (>= 2.0.0)
|
30
|
+
string_utils!
|
data/README
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
StringUtils gem repo
|
2
|
+
|
3
|
+
StringUtils is a library that allows various string manipulation
|
4
|
+
|
5
|
+
Example usage:
|
6
|
+
|
7
|
+
* StringUtils.truncate("hello world", 10, "...") #=> "hello..."
|
8
|
+
* StringUtils.normalize_name "\302\240 Gran Via/Avda.de Asturias " #=> :Gran Via / Avda. de Asturias"
|
data/Rakefile
ADDED
data/lib/string_utils.rb
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
if RUBY_VERSION < '1.9'
|
3
|
+
$KCODE = "UTF8" if $KCODE == "NONE"
|
4
|
+
require 'active_support/core_ext/string/multibyte'
|
5
|
+
end
|
6
|
+
|
7
|
+
|
8
|
+
# StringUtils is a library that allows various string manipulation
|
9
|
+
# Example usage:
|
10
|
+
# * StringUtils.truncate("hello world", 10, "...") #=> "hello..."
|
11
|
+
# * StringUtils.normalize_name "\302\240 Gran Via/Avda.de Asturias " #=> :Gran Via / Avda. de Asturias"
|
12
|
+
module StringUtils
|
13
|
+
extend self
|
14
|
+
|
15
|
+
NBSP = "\302\240"
|
16
|
+
WHITESPACE_MATCHER = "(?:\s|#{NBSP})"
|
17
|
+
WHITESPACE = /#{WHITESPACE_MATCHER}/
|
18
|
+
NOT_WHITESPACE = "[^\s#{NBSP}]"
|
19
|
+
WHITESPACES = /#{WHITESPACE_MATCHER}+/
|
20
|
+
|
21
|
+
|
22
|
+
# Normalizes whitespace
|
23
|
+
# "a , a" => "a, a"
|
24
|
+
# "a ,a" => "a, a"
|
25
|
+
# "a,a" => "a, a"
|
26
|
+
# "a/b" => "a / b", "a/ b" => "a / b", "a /b" => "a / b"
|
27
|
+
# Removes trailing and leading [.,]
|
28
|
+
# options: {:titleize => true (default false)}
|
29
|
+
def normalize_name(value, options = {})
|
30
|
+
value = mb_charify(value)
|
31
|
+
|
32
|
+
# Normalize whitespace
|
33
|
+
value.gsub!("\n", ' ')
|
34
|
+
value.gsub!(WHITESPACES, ' ')
|
35
|
+
value.strip!
|
36
|
+
|
37
|
+
# Remove trailing and leading .,
|
38
|
+
value.gsub!(/^[.,]/, '')
|
39
|
+
value.gsub!(/[.,]$/, '')
|
40
|
+
|
41
|
+
# "a ,a" => "a, a"
|
42
|
+
# "a,a" => "a, a"
|
43
|
+
# "a , a" => "a, a"
|
44
|
+
value.gsub!(/#{WHITESPACE_MATCHER}([,.])/, '\1')
|
45
|
+
value.gsub!(/([,.])(#{NOT_WHITESPACE})/, '\1 \2')
|
46
|
+
|
47
|
+
# "//" => "/"
|
48
|
+
value.gsub!(/\/+/, '/')
|
49
|
+
|
50
|
+
# "a/b" => "a / b", "a/ b" => "a / b", "a /b" => "a / b"
|
51
|
+
value.gsub!(/(#{NOT_WHITESPACE})\//, '\1 /')
|
52
|
+
value.gsub!(/\/(#{NOT_WHITESPACE})/, '/ \1')
|
53
|
+
|
54
|
+
if options[:titleize]
|
55
|
+
value = value.titleize
|
56
|
+
value.gsub!(/#{WHITESPACE_MATCHER}(Of|And|A)#{WHITESPACE_MATCHER}/) { |m| " #{m.downcase} " }
|
57
|
+
end
|
58
|
+
value.to_s
|
59
|
+
end
|
60
|
+
|
61
|
+
# Truncates the string
|
62
|
+
# The result will be +:length+ or shorter, and the words will not be cut in the middle
|
63
|
+
# Arguments:
|
64
|
+
# :length => Integer (default: 30)
|
65
|
+
# :omission => String (default: "...")
|
66
|
+
def truncate(text, *args)
|
67
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
68
|
+
|
69
|
+
# support either old or Rails 2.2 calling convention:
|
70
|
+
unless args.empty?
|
71
|
+
options[:length] = args[0] || 30
|
72
|
+
options[:omission] = args[1] || "…"
|
73
|
+
end
|
74
|
+
|
75
|
+
options = {:length => 30, :omission => "…"}.merge(options)
|
76
|
+
options[:length] = options[:length].to_i
|
77
|
+
|
78
|
+
return "" if !text
|
79
|
+
chars = mb_charify(text)
|
80
|
+
|
81
|
+
|
82
|
+
# If we can return it straight away or rstrip it and return it, we do it here
|
83
|
+
if chars.length <= options[:length]
|
84
|
+
return text
|
85
|
+
elsif (chars = rstrip_with_nbsp(chars)).length <= options[:length]
|
86
|
+
return chars.to_s
|
87
|
+
end
|
88
|
+
|
89
|
+
omission = mb_charify(options[:omission])
|
90
|
+
|
91
|
+
# Here we know we have to remove at least 1 word
|
92
|
+
# 1. Get the first l characters
|
93
|
+
# 2. Remove the last word if it's a part
|
94
|
+
# 3. Add omission
|
95
|
+
length_wo_omission = options[:length] - omission.length
|
96
|
+
|
97
|
+
return '' if length_wo_omission < 0
|
98
|
+
|
99
|
+
result = rstrip_with_nbsp(chars[0...length_wo_omission] || "")
|
100
|
+
|
101
|
+
# Remove the last word unless we happened to trim it exactly already
|
102
|
+
unless chars[length_wo_omission] =~ WHITESPACE || result.length < length_wo_omission
|
103
|
+
len = result.split(WHITESPACES).last
|
104
|
+
len &&= len.length
|
105
|
+
result = rstrip_with_nbsp(result[0...(result.length - (len || 0))])
|
106
|
+
end
|
107
|
+
|
108
|
+
result += options[:omission]
|
109
|
+
result.to_s
|
110
|
+
end
|
111
|
+
|
112
|
+
# Returns a unicode compatible version of the string
|
113
|
+
#
|
114
|
+
# support any of:
|
115
|
+
# * ruby 1.9 sane utf8 support
|
116
|
+
# * rails 2.1 workaround for crappy ruby 1.8 utf8 support
|
117
|
+
# * rails 2.2 workaround for crappy ruby 1.8 utf8 support
|
118
|
+
# hooray!
|
119
|
+
def mb_charify(text)
|
120
|
+
if RUBY_VERSION >= '1.9'
|
121
|
+
text
|
122
|
+
elsif text.respond_to?(:mb_chars)
|
123
|
+
text.mb_chars
|
124
|
+
else
|
125
|
+
raise "StringUtils: No unicode support for strings"
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
private
|
130
|
+
|
131
|
+
def rstrip_with_nbsp(s)
|
132
|
+
s.gsub(/#{WHITESPACE_MATCHER}+\z/, '')
|
133
|
+
end
|
134
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "string_utils/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "string_utils"
|
7
|
+
s.version = StringUtils::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Gleb Mazovetskiy"]
|
10
|
+
s.email = ["glex.spb@gmail.com"]
|
11
|
+
s.homepage = "http://github.com/glebm"
|
12
|
+
s.summary = %q{Provides useful string utils like "truncate to word"}
|
13
|
+
s.description = %q{Provides useful string utils like "truncate to word".
|
14
|
+
Compatible with ruby >= 1.8. Benefits from active_support if available on ruby < 1.9.
|
15
|
+
|
16
|
+
Tested with:
|
17
|
+
|
18
|
+
* ruby 1.8.7 (2010-04-19 patchlevel 253) [i686-linux], MBARI 0x8770, Ruby Enterprise Edition 2010.02
|
19
|
+
*
|
20
|
+
}
|
21
|
+
|
22
|
+
s.rubyforge_project = "string_utils"
|
23
|
+
|
24
|
+
|
25
|
+
s.add_dependency "activesupport", "> 2"
|
26
|
+
s.add_dependency "i18n"
|
27
|
+
|
28
|
+
s.add_development_dependency "rspec", ">= 2.0.0"
|
29
|
+
|
30
|
+
s.files = `git ls-files`.split("\n")
|
31
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
32
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
|
33
|
+
s.require_paths = ["lib"]
|
34
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require "string_utils"
|
3
|
+
|
4
|
+
describe "StringUtils" do
|
5
|
+
describe "#normalize_name" do
|
6
|
+
|
7
|
+
it 'normalizes whitespace' do
|
8
|
+
StringUtils.normalize_name("\302\240 Hello \302\240 World! ").should == "Hello World!"
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'normalizes periods, commas and /' do
|
12
|
+
StringUtils.normalize_name("Av . Valle , Tetuan.Aqui ,a").should == "Av. Valle, Tetuan. Aqui, a"
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'removes trailing and leading [,.]' do
|
16
|
+
StringUtils.normalize_name(".Here,").should == "Here"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'string_utils'
|
3
|
+
|
4
|
+
describe "StringUtils" do
|
5
|
+
describe "#truncate" do
|
6
|
+
|
7
|
+
it "does not cut words shorter than the truncate length" do
|
8
|
+
tests = ["hello", "hello there this is a test nothing more"]
|
9
|
+
tests.each do |test|
|
10
|
+
StringUtils.truncate(test, test.length).should == test
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
it "truncates even if the only word is longer than the truncate length" do
|
15
|
+
StringUtils.truncate("hello", 3, "...").should == "..."
|
16
|
+
end
|
17
|
+
|
18
|
+
it "never goes above the length provided" do
|
19
|
+
(0..2).each do |length|
|
20
|
+
StringUtils.truncate("ɦɛĺłø ŵőřļđ", length, "...").should == ""
|
21
|
+
end
|
22
|
+
|
23
|
+
(3..7).each do |length|
|
24
|
+
StringUtils.truncate("ɦɛĺłø ŵőřļđ", length, "...").should == "..."
|
25
|
+
end
|
26
|
+
|
27
|
+
(8..10).each do |length|
|
28
|
+
StringUtils.truncate("ɦɛĺłø ŵőřļđ", length, "...").should == "ɦɛĺłø..."
|
29
|
+
end
|
30
|
+
|
31
|
+
(11..15).each do |length|
|
32
|
+
StringUtils.truncate("ɦɛĺłø ŵőřļđ", length, "...").should == "ɦɛĺłø ŵőřļđ"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
it "keeps the whole word" do
|
37
|
+
StringUtils.truncate("ab cd", 2, '.').should == "."
|
38
|
+
end
|
39
|
+
|
40
|
+
it "does not cut character entities" do
|
41
|
+
StringUtils.truncate("ä Ä", 9, '..').should == "ä.."
|
42
|
+
end
|
43
|
+
|
44
|
+
it "handles empty strings" do
|
45
|
+
StringUtils.truncate("", 0, "...").should == ""
|
46
|
+
StringUtils.truncate("", 1, "...").should == ""
|
47
|
+
StringUtils.truncate("", 30, "...").should == ""
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
metadata
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: string_utils
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 23
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
- 0
|
10
|
+
version: 1.0.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Gleb Mazovetskiy
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-12-07 00:00:00 +01:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: activesupport
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 7
|
30
|
+
segments:
|
31
|
+
- 2
|
32
|
+
version: "2"
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: i18n
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
hash: 3
|
44
|
+
segments:
|
45
|
+
- 0
|
46
|
+
version: "0"
|
47
|
+
type: :runtime
|
48
|
+
version_requirements: *id002
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
name: rspec
|
51
|
+
prerelease: false
|
52
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
hash: 15
|
58
|
+
segments:
|
59
|
+
- 2
|
60
|
+
- 0
|
61
|
+
- 0
|
62
|
+
version: 2.0.0
|
63
|
+
type: :development
|
64
|
+
version_requirements: *id003
|
65
|
+
description: |
|
66
|
+
Provides useful string utils like "truncate to word".
|
67
|
+
Compatible with ruby >= 1.8. Benefits from active_support if available on ruby < 1.9.
|
68
|
+
|
69
|
+
Tested with:
|
70
|
+
|
71
|
+
* ruby 1.8.7 (2010-04-19 patchlevel 253) [i686-linux], MBARI 0x8770, Ruby Enterprise Edition 2010.02
|
72
|
+
*
|
73
|
+
|
74
|
+
email:
|
75
|
+
- glex.spb@gmail.com
|
76
|
+
executables: []
|
77
|
+
|
78
|
+
extensions: []
|
79
|
+
|
80
|
+
extra_rdoc_files: []
|
81
|
+
|
82
|
+
files:
|
83
|
+
- .gitignore
|
84
|
+
- Gemfile
|
85
|
+
- Gemfile.lock
|
86
|
+
- README
|
87
|
+
- Rakefile
|
88
|
+
- lib/string_utils.rb
|
89
|
+
- lib/string_utils/version.rb
|
90
|
+
- string_utils.gemspec
|
91
|
+
- test/normalize_name_spec.rb
|
92
|
+
- test/truncate_spec.rb
|
93
|
+
has_rdoc: true
|
94
|
+
homepage: http://github.com/glebm
|
95
|
+
licenses: []
|
96
|
+
|
97
|
+
post_install_message:
|
98
|
+
rdoc_options: []
|
99
|
+
|
100
|
+
require_paths:
|
101
|
+
- lib
|
102
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
103
|
+
none: false
|
104
|
+
requirements:
|
105
|
+
- - ">="
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
hash: 3
|
108
|
+
segments:
|
109
|
+
- 0
|
110
|
+
version: "0"
|
111
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
112
|
+
none: false
|
113
|
+
requirements:
|
114
|
+
- - ">="
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
hash: 3
|
117
|
+
segments:
|
118
|
+
- 0
|
119
|
+
version: "0"
|
120
|
+
requirements: []
|
121
|
+
|
122
|
+
rubyforge_project: string_utils
|
123
|
+
rubygems_version: 1.3.7
|
124
|
+
signing_key:
|
125
|
+
specification_version: 3
|
126
|
+
summary: Provides useful string utils like "truncate to word"
|
127
|
+
test_files: []
|
128
|
+
|