string_utils 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +30 -0
- data/README +8 -0
- data/Rakefile +9 -0
- data/lib/string_utils.rb +134 -0
- data/lib/string_utils/version.rb +3 -0
- data/string_utils.gemspec +34 -0
- data/test/normalize_name_spec.rb +19 -0
- data/test/truncate_spec.rb +50 -0
- metadata +128 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
string_utils (1.0.0)
|
5
|
+
activesupport (> 2)
|
6
|
+
i18n
|
7
|
+
|
8
|
+
GEM
|
9
|
+
remote: http://rubygems.org/
|
10
|
+
specs:
|
11
|
+
activesupport (3.0.3)
|
12
|
+
diff-lcs (1.1.2)
|
13
|
+
i18n (0.5.0)
|
14
|
+
rspec (2.2.0)
|
15
|
+
rspec-core (~> 2.2)
|
16
|
+
rspec-expectations (~> 2.2)
|
17
|
+
rspec-mocks (~> 2.2)
|
18
|
+
rspec-core (2.2.1)
|
19
|
+
rspec-expectations (2.2.0)
|
20
|
+
diff-lcs (~> 1.1.2)
|
21
|
+
rspec-mocks (2.2.0)
|
22
|
+
|
23
|
+
PLATFORMS
|
24
|
+
ruby
|
25
|
+
|
26
|
+
DEPENDENCIES
|
27
|
+
activesupport (> 2)
|
28
|
+
i18n
|
29
|
+
rspec (>= 2.0.0)
|
30
|
+
string_utils!
|
data/README
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
StringUtils gem repo
|
2
|
+
|
3
|
+
StringUtils is a library that allows various string manipulation
|
4
|
+
|
5
|
+
Example usage:
|
6
|
+
|
7
|
+
* StringUtils.truncate("hello world", 10, "...") #=> "hello..."
|
8
|
+
* StringUtils.normalize_name "\302\240 Gran Via/Avda.de Asturias " #=> :Gran Via / Avda. de Asturias"
|
data/Rakefile
ADDED
data/lib/string_utils.rb
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
if RUBY_VERSION < '1.9'
|
3
|
+
$KCODE = "UTF8" if $KCODE == "NONE"
|
4
|
+
require 'active_support/core_ext/string/multibyte'
|
5
|
+
end
|
6
|
+
|
7
|
+
|
8
|
+
# StringUtils is a library that allows various string manipulation
|
9
|
+
# Example usage:
|
10
|
+
# * StringUtils.truncate("hello world", 10, "...") #=> "hello..."
|
11
|
+
# * StringUtils.normalize_name "\302\240 Gran Via/Avda.de Asturias " #=> :Gran Via / Avda. de Asturias"
|
12
|
+
module StringUtils
|
13
|
+
extend self
|
14
|
+
|
15
|
+
NBSP = "\302\240"
|
16
|
+
WHITESPACE_MATCHER = "(?:\s|#{NBSP})"
|
17
|
+
WHITESPACE = /#{WHITESPACE_MATCHER}/
|
18
|
+
NOT_WHITESPACE = "[^\s#{NBSP}]"
|
19
|
+
WHITESPACES = /#{WHITESPACE_MATCHER}+/
|
20
|
+
|
21
|
+
|
22
|
+
# Normalizes whitespace
|
23
|
+
# "a , a" => "a, a"
|
24
|
+
# "a ,a" => "a, a"
|
25
|
+
# "a,a" => "a, a"
|
26
|
+
# "a/b" => "a / b", "a/ b" => "a / b", "a /b" => "a / b"
|
27
|
+
# Removes trailing and leading [.,]
|
28
|
+
# options: {:titleize => true (default false)}
|
29
|
+
def normalize_name(value, options = {})
|
30
|
+
value = mb_charify(value)
|
31
|
+
|
32
|
+
# Normalize whitespace
|
33
|
+
value.gsub!("\n", ' ')
|
34
|
+
value.gsub!(WHITESPACES, ' ')
|
35
|
+
value.strip!
|
36
|
+
|
37
|
+
# Remove trailing and leading .,
|
38
|
+
value.gsub!(/^[.,]/, '')
|
39
|
+
value.gsub!(/[.,]$/, '')
|
40
|
+
|
41
|
+
# "a ,a" => "a, a"
|
42
|
+
# "a,a" => "a, a"
|
43
|
+
# "a , a" => "a, a"
|
44
|
+
value.gsub!(/#{WHITESPACE_MATCHER}([,.])/, '\1')
|
45
|
+
value.gsub!(/([,.])(#{NOT_WHITESPACE})/, '\1 \2')
|
46
|
+
|
47
|
+
# "//" => "/"
|
48
|
+
value.gsub!(/\/+/, '/')
|
49
|
+
|
50
|
+
# "a/b" => "a / b", "a/ b" => "a / b", "a /b" => "a / b"
|
51
|
+
value.gsub!(/(#{NOT_WHITESPACE})\//, '\1 /')
|
52
|
+
value.gsub!(/\/(#{NOT_WHITESPACE})/, '/ \1')
|
53
|
+
|
54
|
+
if options[:titleize]
|
55
|
+
value = value.titleize
|
56
|
+
value.gsub!(/#{WHITESPACE_MATCHER}(Of|And|A)#{WHITESPACE_MATCHER}/) { |m| " #{m.downcase} " }
|
57
|
+
end
|
58
|
+
value.to_s
|
59
|
+
end
|
60
|
+
|
61
|
+
# Truncates the string
|
62
|
+
# The result will be +:length+ or shorter, and the words will not be cut in the middle
|
63
|
+
# Arguments:
|
64
|
+
# :length => Integer (default: 30)
|
65
|
+
# :omission => String (default: "...")
|
66
|
+
def truncate(text, *args)
|
67
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
68
|
+
|
69
|
+
# support either old or Rails 2.2 calling convention:
|
70
|
+
unless args.empty?
|
71
|
+
options[:length] = args[0] || 30
|
72
|
+
options[:omission] = args[1] || "…"
|
73
|
+
end
|
74
|
+
|
75
|
+
options = {:length => 30, :omission => "…"}.merge(options)
|
76
|
+
options[:length] = options[:length].to_i
|
77
|
+
|
78
|
+
return "" if !text
|
79
|
+
chars = mb_charify(text)
|
80
|
+
|
81
|
+
|
82
|
+
# If we can return it straight away or rstrip it and return it, we do it here
|
83
|
+
if chars.length <= options[:length]
|
84
|
+
return text
|
85
|
+
elsif (chars = rstrip_with_nbsp(chars)).length <= options[:length]
|
86
|
+
return chars.to_s
|
87
|
+
end
|
88
|
+
|
89
|
+
omission = mb_charify(options[:omission])
|
90
|
+
|
91
|
+
# Here we know we have to remove at least 1 word
|
92
|
+
# 1. Get the first l characters
|
93
|
+
# 2. Remove the last word if it's a part
|
94
|
+
# 3. Add omission
|
95
|
+
length_wo_omission = options[:length] - omission.length
|
96
|
+
|
97
|
+
return '' if length_wo_omission < 0
|
98
|
+
|
99
|
+
result = rstrip_with_nbsp(chars[0...length_wo_omission] || "")
|
100
|
+
|
101
|
+
# Remove the last word unless we happened to trim it exactly already
|
102
|
+
unless chars[length_wo_omission] =~ WHITESPACE || result.length < length_wo_omission
|
103
|
+
len = result.split(WHITESPACES).last
|
104
|
+
len &&= len.length
|
105
|
+
result = rstrip_with_nbsp(result[0...(result.length - (len || 0))])
|
106
|
+
end
|
107
|
+
|
108
|
+
result += options[:omission]
|
109
|
+
result.to_s
|
110
|
+
end
|
111
|
+
|
112
|
+
# Returns a unicode compatible version of the string
|
113
|
+
#
|
114
|
+
# support any of:
|
115
|
+
# * ruby 1.9 sane utf8 support
|
116
|
+
# * rails 2.1 workaround for crappy ruby 1.8 utf8 support
|
117
|
+
# * rails 2.2 workaround for crappy ruby 1.8 utf8 support
|
118
|
+
# hooray!
|
119
|
+
def mb_charify(text)
|
120
|
+
if RUBY_VERSION >= '1.9'
|
121
|
+
text
|
122
|
+
elsif text.respond_to?(:mb_chars)
|
123
|
+
text.mb_chars
|
124
|
+
else
|
125
|
+
raise "StringUtils: No unicode support for strings"
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
private
|
130
|
+
|
131
|
+
def rstrip_with_nbsp(s)
|
132
|
+
s.gsub(/#{WHITESPACE_MATCHER}+\z/, '')
|
133
|
+
end
|
134
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "string_utils/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "string_utils"
|
7
|
+
s.version = StringUtils::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Gleb Mazovetskiy"]
|
10
|
+
s.email = ["glex.spb@gmail.com"]
|
11
|
+
s.homepage = "http://github.com/glebm"
|
12
|
+
s.summary = %q{Provides useful string utils like "truncate to word"}
|
13
|
+
s.description = %q{Provides useful string utils like "truncate to word".
|
14
|
+
Compatible with ruby >= 1.8. Benefits from active_support if available on ruby < 1.9.
|
15
|
+
|
16
|
+
Tested with:
|
17
|
+
|
18
|
+
* ruby 1.8.7 (2010-04-19 patchlevel 253) [i686-linux], MBARI 0x8770, Ruby Enterprise Edition 2010.02
|
19
|
+
*
|
20
|
+
}
|
21
|
+
|
22
|
+
s.rubyforge_project = "string_utils"
|
23
|
+
|
24
|
+
|
25
|
+
s.add_dependency "activesupport", "> 2"
|
26
|
+
s.add_dependency "i18n"
|
27
|
+
|
28
|
+
s.add_development_dependency "rspec", ">= 2.0.0"
|
29
|
+
|
30
|
+
s.files = `git ls-files`.split("\n")
|
31
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
32
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
|
33
|
+
s.require_paths = ["lib"]
|
34
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require "string_utils"
|
3
|
+
|
4
|
+
describe "StringUtils" do
|
5
|
+
describe "#normalize_name" do
|
6
|
+
|
7
|
+
it 'normalizes whitespace' do
|
8
|
+
StringUtils.normalize_name("\302\240 Hello \302\240 World! ").should == "Hello World!"
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'normalizes periods, commas and /' do
|
12
|
+
StringUtils.normalize_name("Av . Valle , Tetuan.Aqui ,a").should == "Av. Valle, Tetuan. Aqui, a"
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'removes trailing and leading [,.]' do
|
16
|
+
StringUtils.normalize_name(".Here,").should == "Here"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'string_utils'
|
3
|
+
|
4
|
+
describe "StringUtils" do
|
5
|
+
describe "#truncate" do
|
6
|
+
|
7
|
+
it "does not cut words shorter than the truncate length" do
|
8
|
+
tests = ["hello", "hello there this is a test nothing more"]
|
9
|
+
tests.each do |test|
|
10
|
+
StringUtils.truncate(test, test.length).should == test
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
it "truncates even if the only word is longer than the truncate length" do
|
15
|
+
StringUtils.truncate("hello", 3, "...").should == "..."
|
16
|
+
end
|
17
|
+
|
18
|
+
it "never goes above the length provided" do
|
19
|
+
(0..2).each do |length|
|
20
|
+
StringUtils.truncate("ɦɛĺłø ŵőřļđ", length, "...").should == ""
|
21
|
+
end
|
22
|
+
|
23
|
+
(3..7).each do |length|
|
24
|
+
StringUtils.truncate("ɦɛĺłø ŵőřļđ", length, "...").should == "..."
|
25
|
+
end
|
26
|
+
|
27
|
+
(8..10).each do |length|
|
28
|
+
StringUtils.truncate("ɦɛĺłø ŵőřļđ", length, "...").should == "ɦɛĺłø..."
|
29
|
+
end
|
30
|
+
|
31
|
+
(11..15).each do |length|
|
32
|
+
StringUtils.truncate("ɦɛĺłø ŵőřļđ", length, "...").should == "ɦɛĺłø ŵőřļđ"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
it "keeps the whole word" do
|
37
|
+
StringUtils.truncate("ab cd", 2, '.').should == "."
|
38
|
+
end
|
39
|
+
|
40
|
+
it "does not cut character entities" do
|
41
|
+
StringUtils.truncate("ä Ä", 9, '..').should == "ä.."
|
42
|
+
end
|
43
|
+
|
44
|
+
it "handles empty strings" do
|
45
|
+
StringUtils.truncate("", 0, "...").should == ""
|
46
|
+
StringUtils.truncate("", 1, "...").should == ""
|
47
|
+
StringUtils.truncate("", 30, "...").should == ""
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
metadata
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: string_utils
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 23
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
- 0
|
10
|
+
version: 1.0.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Gleb Mazovetskiy
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-12-07 00:00:00 +01:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: activesupport
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 7
|
30
|
+
segments:
|
31
|
+
- 2
|
32
|
+
version: "2"
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: i18n
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
hash: 3
|
44
|
+
segments:
|
45
|
+
- 0
|
46
|
+
version: "0"
|
47
|
+
type: :runtime
|
48
|
+
version_requirements: *id002
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
name: rspec
|
51
|
+
prerelease: false
|
52
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
hash: 15
|
58
|
+
segments:
|
59
|
+
- 2
|
60
|
+
- 0
|
61
|
+
- 0
|
62
|
+
version: 2.0.0
|
63
|
+
type: :development
|
64
|
+
version_requirements: *id003
|
65
|
+
description: |
|
66
|
+
Provides useful string utils like "truncate to word".
|
67
|
+
Compatible with ruby >= 1.8. Benefits from active_support if available on ruby < 1.9.
|
68
|
+
|
69
|
+
Tested with:
|
70
|
+
|
71
|
+
* ruby 1.8.7 (2010-04-19 patchlevel 253) [i686-linux], MBARI 0x8770, Ruby Enterprise Edition 2010.02
|
72
|
+
*
|
73
|
+
|
74
|
+
email:
|
75
|
+
- glex.spb@gmail.com
|
76
|
+
executables: []
|
77
|
+
|
78
|
+
extensions: []
|
79
|
+
|
80
|
+
extra_rdoc_files: []
|
81
|
+
|
82
|
+
files:
|
83
|
+
- .gitignore
|
84
|
+
- Gemfile
|
85
|
+
- Gemfile.lock
|
86
|
+
- README
|
87
|
+
- Rakefile
|
88
|
+
- lib/string_utils.rb
|
89
|
+
- lib/string_utils/version.rb
|
90
|
+
- string_utils.gemspec
|
91
|
+
- test/normalize_name_spec.rb
|
92
|
+
- test/truncate_spec.rb
|
93
|
+
has_rdoc: true
|
94
|
+
homepage: http://github.com/glebm
|
95
|
+
licenses: []
|
96
|
+
|
97
|
+
post_install_message:
|
98
|
+
rdoc_options: []
|
99
|
+
|
100
|
+
require_paths:
|
101
|
+
- lib
|
102
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
103
|
+
none: false
|
104
|
+
requirements:
|
105
|
+
- - ">="
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
hash: 3
|
108
|
+
segments:
|
109
|
+
- 0
|
110
|
+
version: "0"
|
111
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
112
|
+
none: false
|
113
|
+
requirements:
|
114
|
+
- - ">="
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
hash: 3
|
117
|
+
segments:
|
118
|
+
- 0
|
119
|
+
version: "0"
|
120
|
+
requirements: []
|
121
|
+
|
122
|
+
rubyforge_project: string_utils
|
123
|
+
rubygems_version: 1.3.7
|
124
|
+
signing_key:
|
125
|
+
specification_version: 3
|
126
|
+
summary: Provides useful string utils like "truncate to word"
|
127
|
+
test_files: []
|
128
|
+
|