babosa 0.3.11 → 2.0.0.beta

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +5 -5
  2. data/Changelog.md +117 -17
  3. data/README.md +82 -119
  4. data/Rakefile +9 -8
  5. data/lib/babosa.rb +2 -21
  6. data/lib/babosa/identifier.rb +87 -124
  7. data/lib/babosa/transliterator/base.rb +59 -43
  8. data/lib/babosa/transliterator/bulgarian.rb +3 -2
  9. data/lib/babosa/transliterator/cyrillic.rb +5 -5
  10. data/lib/babosa/transliterator/danish.rb +3 -3
  11. data/lib/babosa/transliterator/german.rb +3 -2
  12. data/lib/babosa/transliterator/greek.rb +4 -3
  13. data/lib/babosa/transliterator/hindi.rb +138 -0
  14. data/lib/babosa/transliterator/latin.rb +5 -5
  15. data/lib/babosa/transliterator/macedonian.rb +3 -2
  16. data/lib/babosa/transliterator/norwegian.rb +3 -3
  17. data/lib/babosa/transliterator/romanian.rb +3 -2
  18. data/lib/babosa/transliterator/russian.rb +3 -2
  19. data/lib/babosa/transliterator/serbian.rb +29 -27
  20. data/lib/babosa/transliterator/spanish.rb +2 -2
  21. data/lib/babosa/transliterator/swedish.rb +3 -3
  22. data/lib/babosa/transliterator/turkish.rb +8 -0
  23. data/lib/babosa/transliterator/ukrainian.rb +23 -3
  24. data/lib/babosa/transliterator/vietnamese.rb +4 -3
  25. data/lib/babosa/version.rb +3 -1
  26. data/spec/identifier_spec.rb +157 -0
  27. data/spec/spec_helper.rb +15 -12
  28. data/spec/transliterators/base_spec.rb +7 -8
  29. data/spec/transliterators/bulgarian_spec.rb +4 -5
  30. data/spec/transliterators/danish_spec.rb +5 -6
  31. data/spec/transliterators/german_spec.rb +6 -7
  32. data/spec/transliterators/greek_spec.rb +7 -7
  33. data/spec/transliterators/hindi_spec.rb +17 -0
  34. data/spec/transliterators/latin_spec.rb +8 -0
  35. data/spec/transliterators/macedonian_spec.rb +3 -4
  36. data/spec/transliterators/norwegian_spec.rb +4 -4
  37. data/spec/transliterators/polish_spec.rb +12 -0
  38. data/spec/transliterators/romanian_spec.rb +5 -6
  39. data/spec/transliterators/russian_spec.rb +3 -4
  40. data/spec/transliterators/serbian_spec.rb +6 -7
  41. data/spec/transliterators/spanish_spec.rb +5 -6
  42. data/spec/transliterators/swedish_spec.rb +7 -7
  43. data/spec/transliterators/turkish_spec.rb +24 -0
  44. data/spec/transliterators/ukrainian_spec.rb +81 -3
  45. data/spec/transliterators/vietnamese_spec.rb +10 -10
  46. metadata +41 -46
  47. data/init.rb +0 -3
  48. data/lib/babosa/candidates.rb +0 -45
  49. data/lib/babosa/generator.rb +0 -24
  50. data/lib/babosa/utf8/active_support_proxy.rb +0 -20
  51. data/lib/babosa/utf8/dumb_proxy.rb +0 -42
  52. data/lib/babosa/utf8/java_proxy.rb +0 -22
  53. data/lib/babosa/utf8/mappings.rb +0 -193
  54. data/lib/babosa/utf8/proxy.rb +0 -118
  55. data/lib/babosa/utf8/unicode_proxy.rb +0 -21
  56. data/spec/babosa_spec.rb +0 -145
  57. data/spec/utf8_proxy_spec.rb +0 -48
@@ -1,4 +1,5 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
+
2
3
  module Babosa
3
4
  module Transliterator
4
5
  class Vietnamese < Latin
@@ -137,7 +138,7 @@ module Babosa
137
138
  "Ỹ" => "Y",
138
139
  "đ" => "d",
139
140
  "Đ" => "D"
140
- }
141
+ }.freeze
141
142
  end
142
143
  end
143
- end
144
+ end
@@ -1,5 +1,7 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Babosa
2
4
  module Version
3
- STRING = "0.3.11"
5
+ STRING = "2.0.0.beta"
4
6
  end
5
7
  end
@@ -0,0 +1,157 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helper"
4
+
5
+ describe Babosa::Identifier do
6
+ it "should respond_to :empty?" do
7
+ expect("".to_slug).to respond_to(:empty?)
8
+ end
9
+
10
+ %w[approximate_ascii clean downcase word_chars normalize to_ascii upcase with_dashes].each do |method|
11
+ describe "##{method}" do
12
+ it "should work with invalid UTF-8 strings" do
13
+ expect { "\x93abc".to_slug.send method }.not_to raise_exception
14
+ end
15
+ end
16
+ end
17
+
18
+ describe "#word_chars" do
19
+ it "word_chars! should leave only letters and spaces" do
20
+ string = "a*$%^$@!@b$%^&*()*!c"
21
+ expect(string.to_slug.word_chars!).to match(/[a-z ]*/i)
22
+ end
23
+ end
24
+
25
+ describe "#transliterate" do
26
+ it "should transliterate to ascii" do
27
+ (0xC0..0x17E).to_a.each do |codepoint|
28
+ ss = [codepoint].pack("U*").to_slug
29
+ expect(ss.approximate_ascii!).to match(/[\x0-\x7f]/)
30
+ end
31
+ end
32
+
33
+ it "should transliterate uncomposed utf8" do
34
+ string = [117, 776].pack("U*") # "ü" as ASCII "u" plus COMBINING DIAERESIS
35
+ expect(string.to_slug.approximate_ascii).to eql("u")
36
+ end
37
+
38
+ it "should transliterate using multiple transliterators" do
39
+ string = "свободное režģis"
40
+ expect(string.to_slug.approximate_ascii(:latin, :russian)).to eql("svobodnoe rezgis")
41
+ end
42
+ end
43
+
44
+ describe "#downcase" do
45
+ it "should lowercase strings" do
46
+ expect("FELIZ AÑO".to_slug.downcase).to eql("feliz año")
47
+ end
48
+ end
49
+
50
+ describe "#upcase" do
51
+ it "should uppercase strings" do
52
+ expect("feliz año".to_slug.upcase).to eql("FELIZ AÑO")
53
+ end
54
+ end
55
+
56
+ describe "#normalize" do
57
+ it "should allow passing locale as key for :transliterate" do
58
+ expect("ö".to_slug.clean.normalize(transliterate: :german)).to eql("oe")
59
+ end
60
+
61
+ it "should replace whitespace with dashes" do
62
+ expect("a b".to_slug.clean.normalize).to eql("a-b")
63
+ end
64
+
65
+ it "should replace multiple spaces with 1 dash" do
66
+ expect("a b".to_slug.clean.normalize).to eql("a-b")
67
+ end
68
+
69
+ it "should replace multiple dashes with 1 dash" do
70
+ expect("male - female".to_slug.normalize).to eql("male-female")
71
+ end
72
+
73
+ it "should strip trailing space" do
74
+ expect("ab ".to_slug.normalize).to eql("ab")
75
+ end
76
+
77
+ it "should strip leading space" do
78
+ expect(" ab".to_slug.normalize).to eql("ab")
79
+ end
80
+
81
+ it "should strip trailing slashes" do
82
+ expect("ab-".to_slug.normalize).to eql("ab")
83
+ end
84
+
85
+ it "should strip leading slashes" do
86
+ expect("-ab".to_slug.normalize).to eql("ab")
87
+ end
88
+
89
+ it "should not modify valid name strings" do
90
+ expect("a-b-c-d".to_slug.normalize).to eql("a-b-c-d")
91
+ end
92
+
93
+ it "should not convert underscores" do
94
+ expect("hello_world".to_slug.normalize).to eql("hello_world")
95
+ end
96
+
97
+ it "should work with non roman chars" do
98
+ expect("検 索".to_slug.normalize).to eql("検-索")
99
+ end
100
+
101
+ context "with to_ascii option" do
102
+ it "should approximate and strip non ascii" do
103
+ ss = "カタカナ: katakana is über cool".to_slug
104
+ expect(ss.normalize(to_ascii: true)).to eql("katakana-is-uber-cool")
105
+ end
106
+ end
107
+ end
108
+
109
+ describe "#truncate_bytes" do
110
+ it "should by byte length" do
111
+ expect("üa".to_slug.truncate_bytes(2)).to eql("ü")
112
+ expect("üa".to_slug.truncate_bytes(1)).to eql("")
113
+ expect("üa".to_slug.truncate_bytes(100)).to eql("üa")
114
+ expect("üéøá".to_slug.truncate_bytes(3)).to eql("ü")
115
+ end
116
+ end
117
+
118
+ describe "#truncate" do
119
+ it "should truncate by char length" do
120
+ expect("üa".to_slug.truncate(2)).to eql("üa")
121
+ expect("üa".to_slug.truncate(1)).to eql("ü")
122
+ expect("üa".to_slug.truncate(100)).to eql("üa")
123
+ end
124
+ end
125
+
126
+ describe "#with_dashes" do
127
+ it "should not change byte size when replacing spaces" do
128
+ expect("".to_slug.with_dashes.bytesize).to eql(0)
129
+ expect(" ".to_slug.with_dashes.bytesize).to eql(1)
130
+ expect("-abc-".to_slug.with_dashes.bytesize).to eql(5)
131
+ expect(" abc ".to_slug.with_dashes.bytesize).to eql(5)
132
+ expect(" a bc ".to_slug.with_dashes.bytesize).to eql(7)
133
+ end
134
+ end
135
+
136
+ describe "#to_ruby_method" do
137
+ it "should get a string suitable for use as a ruby method" do
138
+ expect("¿¿¿hello... world???".to_slug.to_ruby_method).to eql("hello_world?")
139
+ expect("カタカナ: katakana is über cool".to_slug.to_ruby_method).to eql("katakana_is_uber_cool")
140
+ expect("カタカナ: katakana is über cool!".to_slug.to_ruby_method).to eql("katakana_is_uber_cool!")
141
+ expect("カタカナ: katakana is über cool".to_slug.to_ruby_method(allow_bangs: false)).to eql("katakana_is_uber_cool")
142
+ end
143
+
144
+ it "should optionally remove trailing punctuation" do
145
+ expect("¿¿¿hello... world???".to_slug.to_ruby_method(allow_bangs: false)).to eql("hello_world")
146
+ end
147
+
148
+ it "should raise an error when it would generate an impossible method name" do
149
+ # "1".to_identifier.to_ruby_method
150
+ expect { "1".to_identifier.to_ruby_method }.to raise_error(Babosa::Identifier::Error)
151
+ end
152
+
153
+ it "should raise Babosa::Error error when the string is nil" do
154
+ expect { "".to_slug.to_ruby_method }.to raise_error(Babosa::Identifier::Error)
155
+ end
156
+ end
157
+ end
@@ -1,25 +1,19 @@
1
- # encoding: utf-8
2
- $LOAD_PATH << File.expand_path("../lib", __FILE__)
3
- $LOAD_PATH.uniq!
1
+ # frozen_string_literal: true
4
2
 
5
3
  if ENV["COV"]
6
4
  require "simplecov"
7
5
  SimpleCov.start
8
6
  end
9
7
 
10
- # encoding: utf-8
11
- $KCODE = 'UTF8' if RUBY_VERSION < '1.9'
12
-
13
- require "rubygems"
8
+ require "bundler/setup"
14
9
  require "babosa"
15
- require "active_support"
16
10
 
17
11
  shared_examples_for "a latin transliterator" do
18
12
  let(:t) { described_class.instance }
19
13
 
20
14
  it "should transliterate latin characters" do
21
15
  string = (0xC0..0x17E).to_a.pack("U*")
22
- t.transliterate(string).should match(/[\x0-\x7f]/)
16
+ expect(t.transliterate(string)).to match(/[\x0-\x7f]/)
23
17
  end
24
18
  end
25
19
 
@@ -28,7 +22,7 @@ shared_examples_for "a cyrillic transliterator" do
28
22
 
29
23
  it "should transliterate cyrillic characters" do
30
24
  string = "Славься, Отечество наше свободное"
31
- t.transliterate(string).should match(/[\x0-\x7f]/)
25
+ expect(t.transliterate(string)).to match(/[\x0-\x7f]/)
32
26
  end
33
27
  end
34
28
 
@@ -37,6 +31,15 @@ shared_examples_for "a greek transliterator" do
37
31
 
38
32
  it "should transliterate greek characters" do
39
33
  string = "Γερμανία"
40
- t.transliterate(string).should match(/[\x0-\x7f]/)
34
+ expect(t.transliterate(string)).to match(/[\x0-\x7f]/)
41
35
  end
42
- end
36
+ end
37
+
38
+ shared_examples_for "a hindi transliterator" do
39
+ let(:t) { described_class.instance }
40
+
41
+ it "should transliterate hindi characters" do
42
+ string = "आदित्य तापड़िया"
43
+ expect(t.transliterate(string)).to match(/[\x0-\x7f]/)
44
+ end
45
+ end
@@ -1,16 +1,15 @@
1
- # encoding: utf-8
2
- require File.expand_path("../../spec_helper", __FILE__)
1
+ # frozen_string_literal: true
3
2
 
4
- describe Babosa::Transliterator::Base do
3
+ require "spec_helper"
5
4
 
6
- before { @t = Babosa::Transliterator::Base.instance }
5
+ describe Babosa::Transliterator::Base do
6
+ let(:t) { Babosa::Transliterator::Base.instance }
7
7
 
8
8
  it "should transliterate 'smart' quotes" do
9
- @t.transliterate("’").should eql("'")
9
+ expect(t.transliterate("’")).to eql("'")
10
10
  end
11
11
 
12
12
  it "should transliterate non-breaking spaces" do
13
- @t.transliterate("\xc2\xa0").should eql(" ")
13
+ expect(t.transliterate("\xc2\xa0")).to eql(" ")
14
14
  end
15
-
16
- end
15
+ end
@@ -1,8 +1,8 @@
1
- # encoding: utf-8
2
- require File.expand_path("../../spec_helper", __FILE__)
1
+ # frozen_string_literal: true
3
2
 
4
- describe Babosa::Transliterator::Bulgarian do
3
+ require "spec_helper"
5
4
 
5
+ describe Babosa::Transliterator::Bulgarian do
6
6
  let(:t) { described_class.instance }
7
7
  it_behaves_like "a cyrillic transliterator"
8
8
 
@@ -14,7 +14,6 @@ describe Babosa::Transliterator::Bulgarian do
14
14
  "Щъркел" => "Shturkel",
15
15
  "полицай" => "policai"
16
16
  }
17
- examples.each {|k, v| t.transliterate(k).should eql(v)}
17
+ examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
18
18
  end
19
-
20
19
  end
@@ -1,8 +1,8 @@
1
- # encoding: utf-8
2
- require File.expand_path("../../spec_helper", __FILE__)
1
+ # frozen_string_literal: true
3
2
 
4
- describe Babosa::Transliterator::Danish do
3
+ require "spec_helper"
5
4
 
5
+ describe Babosa::Transliterator::Danish do
6
6
  let(:t) { described_class.instance }
7
7
  it_behaves_like "a latin transliterator"
8
8
 
@@ -11,7 +11,6 @@ describe Babosa::Transliterator::Danish do
11
11
  "Ærøskøbing" => "Aeroeskoebing",
12
12
  "Årslev" => "Aarslev"
13
13
  }
14
- examples.each {|k, v| t.transliterate(k).should eql(v)}
14
+ examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
15
15
  end
16
-
17
- end
16
+ end
@@ -1,17 +1,16 @@
1
- # encoding: utf-8
2
- require File.expand_path("../../spec_helper", __FILE__)
1
+ # frozen_string_literal: true
3
2
 
4
- describe Babosa::Transliterator::German do
3
+ require "spec_helper"
5
4
 
5
+ describe Babosa::Transliterator::German do
6
6
  let(:t) { described_class.instance }
7
7
  it_behaves_like "a latin transliterator"
8
8
 
9
9
  it "should transliterate Eszett" do
10
- t.transliterate("ß").should eql("ss")
10
+ expect(t.transliterate("ß")).to eql("ss")
11
11
  end
12
12
 
13
13
  it "should transliterate vowels with umlauts" do
14
- t.transliterate("üöä").should eql("ueoeae")
14
+ expect(t.transliterate("üöä")).to eql("ueoeae")
15
15
  end
16
-
17
- end
16
+ end
@@ -1,17 +1,17 @@
1
- # encoding: utf-8
2
- require File.expand_path("../../spec_helper", __FILE__)
1
+ # frozen_string_literal: true
3
2
 
4
- describe Babosa::Transliterator::Greek do
3
+ require "spec_helper"
5
4
 
5
+ describe Babosa::Transliterator::Greek do
6
6
  let(:t) { described_class.instance }
7
7
  it_behaves_like "a greek transliterator"
8
8
 
9
9
  it "should transliterate various characters" do
10
10
  examples = {
11
- "Γερμανία" => "Germania",
12
- "Αυστρία" => "Aystria",
13
- "Ιταλία" => "Italia"
11
+ "Γερμανία" => "Germania",
12
+ "Αυστρία" => "Aystria",
13
+ "Ιταλία" => "Italia"
14
14
  }
15
- examples.each {|k, v| t.transliterate(k).should eql(v)}
15
+ examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
16
16
  end
17
17
  end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helper"
4
+
5
+ describe Babosa::Transliterator::Hindi do
6
+ let(:t) { described_class.instance }
7
+ it_behaves_like "a hindi transliterator"
8
+
9
+ it "should transliterate hindi characters" do
10
+ examples = {
11
+ "आदित्य" => "aadity",
12
+ "सबरीमाला करवाना पायसम" => "sbriimaalaa krvaanaa paaysm",
13
+ "सक्रांति आँख" => "skraanti aankh"
14
+ }
15
+ examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
16
+ end
17
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helper"
4
+
5
+ describe Babosa::Transliterator::Latin do
6
+ let(:t) { described_class.instance }
7
+ it_behaves_like "a latin transliterator"
8
+ end
@@ -1,9 +1,8 @@
1
- # encoding: utf-8
2
- require File.expand_path("../../spec_helper", __FILE__)
1
+ # frozen_string_literal: true
3
2
 
4
- describe Babosa::Transliterator::Macedonian do
3
+ require "spec_helper"
5
4
 
5
+ describe Babosa::Transliterator::Macedonian do
6
6
  let(:t) { described_class.instance }
7
7
  it_behaves_like "a cyrillic transliterator"
8
-
9
8
  end
@@ -1,8 +1,8 @@
1
- # encoding: utf-8
2
- require File.expand_path("../../spec_helper", __FILE__)
1
+ # frozen_string_literal: true
3
2
 
4
- describe Babosa::Transliterator::Norwegian do
3
+ require "spec_helper"
5
4
 
5
+ describe Babosa::Transliterator::Norwegian do
6
6
  let(:t) { described_class.instance }
7
7
  it_behaves_like "a latin transliterator"
8
8
 
@@ -13,6 +13,6 @@ describe Babosa::Transliterator::Norwegian do
13
13
  "Åre" => "Aare",
14
14
  "Håkon" => "Haakon"
15
15
  }
16
- examples.each {|k, v| t.transliterate(k).should eql(v)}
16
+ examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
17
17
  end
18
18
  end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helper"
4
+
5
+ describe Babosa::Transliterator::Romanian do
6
+ let(:t) { described_class.instance }
7
+ it_behaves_like "a latin transliterator"
8
+
9
+ it "should transliterate various characters" do
10
+ expect(t.transliterate("ĄąĆćĘꣳŃńÓ󌜏źŻż")).to eql("AaCcEeLlNnOoSsZzZz")
11
+ end
12
+ end
@@ -1,8 +1,8 @@
1
- # encoding: utf-8
2
- require File.expand_path("../../spec_helper", __FILE__)
1
+ # frozen_string_literal: true
3
2
 
4
- describe Babosa::Transliterator::Romanian do
3
+ require "spec_helper"
5
4
 
5
+ describe Babosa::Transliterator::Romanian do
6
6
  let(:t) { described_class.instance }
7
7
  it_behaves_like "a latin transliterator"
8
8
 
@@ -13,7 +13,6 @@ describe Babosa::Transliterator::Romanian do
13
13
  "Țară" => "Tara",
14
14
  "Șanț" => "Sant"
15
15
  }
16
- examples.each {|k, v| t.transliterate(k).should eql(v)}
16
+ examples.each { |k, v| expect(t.transliterate(k)).to eql(v) }
17
17
  end
18
-
19
- end
18
+ end