unaccent 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE.txt +1 -1
- data/README.md +23 -19
- data/lib/unaccent/accentmap.rb +2 -0
- data/lib/unaccent/string.rb +2 -12
- data/lib/unaccent/version.rb +3 -1
- data/lib/unaccent.rb +5 -60
- metadata +4 -13
- data/.github/workflows/build.yml +0 -36
- data/.gitignore +0 -9
- data/.ruby-version +0 -1
- data/Gemfile +0 -4
- data/Rakefile +0 -8
- data/bin/benchmark +0 -81
- data/bin/console +0 -14
- data/bin/setup +0 -8
- data/unaccent.gemspec +0 -25
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 850131686c6638f3c48c8441ab13c2b4a6220b752aaaab56e89fe19a274715fc
|
4
|
+
data.tar.gz: 2a026d497598b7303b6ee54b0d9f46b10d7015fff20ed2ad80b060448f638b42
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dc135100fc86657bd4538ce6a9081a8e02e279d4903b52361b99b88d5d517a6247c08b2590abdbf726e3e27d4c77afb9036bdc2163cc1d1afc2ff541fafb895a
|
7
|
+
data.tar.gz: a52a5e96a933058c774dc4e35b5a12f9982597cdf3511a6e3db9c6d299f07e337e24ad5455e3cf5e2f33caf270ec0e4426230ef589ec5d022216724b0fb52a87
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Unaccent
|
2
2
|
|
3
|
-
Ruby gem to replace a string's accent characters with
|
3
|
+
Ruby gem to replace a string's accent characters with unaccented characters. Based on [SixArm Ruby Unaccent](https://github.com/SixArm/sixarm_ruby_unaccent).
|
4
4
|
|
5
5
|
[![Gem Version](https://badge.fury.io/rb/unaccent.svg)](https://badge.fury.io/rb/unaccent)
|
6
6
|
[![Build](https://github.com/hardpixel/unaccent/actions/workflows/build.yml/badge.svg)](https://github.com/hardpixel/unaccent/actions/workflows/build.yml)
|
@@ -39,24 +39,28 @@ require 'unaccent/string'
|
|
39
39
|
## Benchmark
|
40
40
|
|
41
41
|
```
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
42
|
+
Warming up --------------------------------------
|
43
|
+
unaccent 3.259k i/100ms
|
44
|
+
sixarm 838.000 i/100ms
|
45
|
+
Calculating -------------------------------------
|
46
|
+
unaccent 32.573k (± 2.1%) i/s - 162.950k in 5.004780s
|
47
|
+
sixarm 8.025k (± 4.6%) i/s - 40.224k in 5.023339s
|
48
|
+
|
49
|
+
Comparison:
|
50
|
+
unaccent: 32573.1 i/s
|
51
|
+
sixarm: 8024.7 i/s - 4.06x (± 0.00) slower
|
52
|
+
|
53
|
+
Calculating -------------------------------------
|
54
|
+
unaccent 5.947k memsize ( 0.000 retained)
|
55
|
+
76.000 objects ( 0.000 retained)
|
56
|
+
42.000 strings ( 0.000 retained)
|
57
|
+
sixarm 29.979k memsize ( 0.000 retained)
|
58
|
+
633.000 objects ( 0.000 retained)
|
59
|
+
50.000 strings ( 0.000 retained)
|
60
|
+
|
61
|
+
Comparison:
|
62
|
+
unaccent: 5947 allocated
|
63
|
+
sixarm: 29979 allocated - 5.04x more
|
60
64
|
```
|
61
65
|
|
62
66
|
## Development
|
data/lib/unaccent/accentmap.rb
CHANGED
data/lib/unaccent/string.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'unaccent'
|
2
4
|
|
3
5
|
module Unaccent
|
@@ -5,18 +7,6 @@ module Unaccent
|
|
5
7
|
def unaccent
|
6
8
|
Unaccent.unaccent(self)
|
7
9
|
end
|
8
|
-
|
9
|
-
def unaccent_via_gsub
|
10
|
-
Unaccent.via_gsub(self)
|
11
|
-
end
|
12
|
-
|
13
|
-
def unaccent_via_each_char
|
14
|
-
Unaccent.via_each_char(self)
|
15
|
-
end
|
16
|
-
|
17
|
-
def unaccent_via_split_map
|
18
|
-
Unaccent.via_split_map(self)
|
19
|
-
end
|
20
10
|
end
|
21
11
|
end
|
22
12
|
|
data/lib/unaccent/version.rb
CHANGED
data/lib/unaccent.rb
CHANGED
@@ -1,7 +1,10 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
require 'unaccent/version'
|
3
4
|
|
4
5
|
module Unaccent
|
6
|
+
autoload :ACCENTMAP, 'unaccent/accentmap'
|
7
|
+
|
5
8
|
class << self
|
6
9
|
# Replace a string's accented characters with unaccented characters.
|
7
10
|
#
|
@@ -12,67 +15,9 @@ module Unaccent
|
|
12
15
|
# @return [String] a string that has no accents
|
13
16
|
|
14
17
|
def unaccent(str)
|
15
|
-
via_gsub(str)
|
16
|
-
end
|
17
|
-
|
18
|
-
# Replace a string's accented characters with unaccented characters,
|
19
|
-
# by using string `#gsub` to replace non-ascii characters.
|
20
|
-
#
|
21
|
-
# @example
|
22
|
-
# str = 'Å Ç ß'
|
23
|
-
# Unaccent.via_gsub(str) = > 'AA C ss'
|
24
|
-
#
|
25
|
-
# @return [String] a string that has no accents
|
26
|
-
|
27
|
-
def via_gsub(str)
|
28
|
-
return str if str.ascii_only?
|
29
|
-
|
30
|
-
str.gsub(/[^[:ascii:]]/) { |c| ACCENTMAP.fetch(c, c) }
|
31
|
-
end
|
32
|
-
|
33
|
-
# Replace a string's accented characters with unaccented characters,
|
34
|
-
# by using string `#scan` to iterate on characters.
|
35
|
-
#
|
36
|
-
# @example
|
37
|
-
# str = 'Å Ç ß'
|
38
|
-
# Unaccent.via_scan(str) = > 'AA C ss'
|
39
|
-
#
|
40
|
-
# @return [String] a string that has no accents
|
41
|
-
|
42
|
-
def via_scan(str)
|
43
|
-
return str if str.ascii_only?
|
44
|
-
|
45
|
-
res = ''; str.scan(/./) { |c| res << ACCENTMAP.fetch(c, c) }; res
|
46
|
-
end
|
47
|
-
|
48
|
-
# Replace a string's accented characters with unaccented characters,
|
49
|
-
# by using string `#each_char` to iterate on characters.
|
50
|
-
#
|
51
|
-
# @example
|
52
|
-
# str = 'Å Ç ß'
|
53
|
-
# Unaccent.via_each_char(str) = > 'AA C ss'
|
54
|
-
#
|
55
|
-
# @return [String] a string that has no accents
|
56
|
-
|
57
|
-
def via_each_char(str)
|
58
|
-
return str if str.ascii_only?
|
59
|
-
|
60
|
-
res = ''; str.each_char { |c| res << ACCENTMAP.fetch(c, c) }; res
|
61
|
-
end
|
62
|
-
|
63
|
-
# Replace a string's accented characters with unaccented characters,
|
64
|
-
# by using string `#split` and `#map` to iterate on characters.
|
65
|
-
#
|
66
|
-
# @example
|
67
|
-
# str = 'Å Ç ß'
|
68
|
-
# Unaccent.via_split_map(str) = > 'AA C ss'
|
69
|
-
#
|
70
|
-
# @return [String] a string that has no accents
|
71
|
-
|
72
|
-
def via_split_map(str)
|
73
18
|
return str if str.ascii_only?
|
74
19
|
|
75
|
-
str.
|
20
|
+
str.gsub(/[^[:ascii:]]/) { |char| ACCENTMAP.fetch(char, char) }
|
76
21
|
end
|
77
22
|
end
|
78
23
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unaccent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonian Guveli
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-08-
|
11
|
+
date: 2022-08-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -59,22 +59,13 @@ executables: []
|
|
59
59
|
extensions: []
|
60
60
|
extra_rdoc_files: []
|
61
61
|
files:
|
62
|
-
- ".github/workflows/build.yml"
|
63
|
-
- ".gitignore"
|
64
|
-
- ".ruby-version"
|
65
62
|
- ".yardopts"
|
66
|
-
- Gemfile
|
67
63
|
- LICENSE.txt
|
68
64
|
- README.md
|
69
|
-
- Rakefile
|
70
|
-
- bin/benchmark
|
71
|
-
- bin/console
|
72
|
-
- bin/setup
|
73
65
|
- lib/unaccent.rb
|
74
66
|
- lib/unaccent/accentmap.rb
|
75
67
|
- lib/unaccent/string.rb
|
76
68
|
- lib/unaccent/version.rb
|
77
|
-
- unaccent.gemspec
|
78
69
|
homepage: https://github.com/hardpixel/unaccent
|
79
70
|
licenses:
|
80
71
|
- MIT
|
@@ -87,14 +78,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
87
78
|
requirements:
|
88
79
|
- - ">="
|
89
80
|
- !ruby/object:Gem::Version
|
90
|
-
version: '
|
81
|
+
version: '2.6'
|
91
82
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
92
83
|
requirements:
|
93
84
|
- - ">="
|
94
85
|
- !ruby/object:Gem::Version
|
95
86
|
version: '0'
|
96
87
|
requirements: []
|
97
|
-
rubygems_version: 3.3.
|
88
|
+
rubygems_version: 3.3.14
|
98
89
|
signing_key:
|
99
90
|
specification_version: 4
|
100
91
|
summary: Replace accented characters with unaccented characters
|
data/.github/workflows/build.yml
DELETED
@@ -1,36 +0,0 @@
|
|
1
|
-
---
|
2
|
-
name: Build
|
3
|
-
|
4
|
-
on:
|
5
|
-
push:
|
6
|
-
branches: [master]
|
7
|
-
paths-ignore:
|
8
|
-
- 'README.md'
|
9
|
-
- 'LICENSE.txt'
|
10
|
-
- 'bin/**'
|
11
|
-
pull_request:
|
12
|
-
branches: [master]
|
13
|
-
|
14
|
-
permissions:
|
15
|
-
contents: read
|
16
|
-
|
17
|
-
jobs:
|
18
|
-
build:
|
19
|
-
strategy:
|
20
|
-
matrix:
|
21
|
-
os: [ubuntu-latest]
|
22
|
-
ruby: ['2.7', '3.1']
|
23
|
-
runs-on: ${{ matrix.os }}
|
24
|
-
|
25
|
-
steps:
|
26
|
-
- uses: actions/checkout@v3
|
27
|
-
|
28
|
-
- name: Set up Ruby
|
29
|
-
uses: ruby/setup-ruby@v1
|
30
|
-
with:
|
31
|
-
ruby-version: ${{ matrix.ruby }}
|
32
|
-
bundler-cache: true
|
33
|
-
|
34
|
-
- name: Run tests
|
35
|
-
run: |
|
36
|
-
bundle exec rake test
|
data/.gitignore
DELETED
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
3.1.2
|
data/Gemfile
DELETED
data/Rakefile
DELETED
data/bin/benchmark
DELETED
@@ -1,81 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'bundler/inline'
|
4
|
-
require 'tempfile'
|
5
|
-
|
6
|
-
gemfile true, quiet: true do
|
7
|
-
source 'https://rubygems.org'
|
8
|
-
gemspec
|
9
|
-
|
10
|
-
gem 'benchmark-ips'
|
11
|
-
gem 'benchmark-memory'
|
12
|
-
gem 'sixarm_ruby_unaccent'
|
13
|
-
end
|
14
|
-
|
15
|
-
STRINGS = [
|
16
|
-
'String without accents in english',
|
17
|
-
'Streng med aksenter på norsk',
|
18
|
-
'Řetězec s akcenty v češtině',
|
19
|
-
'Chaîne avec accents en français',
|
20
|
-
'Cuerda con acentos en español',
|
21
|
-
'Corda amb accents en català',
|
22
|
-
'Stīga ar akcentiem latviešu valodā',
|
23
|
-
'Türkçe aksanlı dize',
|
24
|
-
'Varg me theks në shqip',
|
25
|
-
'Κείμενο με τόνους στα ελληνικά'
|
26
|
-
]
|
27
|
-
|
28
|
-
reports = lambda do |x|
|
29
|
-
x.report('gsub') do
|
30
|
-
STRINGS.each do |str|
|
31
|
-
Unaccent.via_gsub(str)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
x.report('scan') do
|
36
|
-
STRINGS.each do |str|
|
37
|
-
Unaccent.via_scan(str)
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
x.report('each_char') do
|
42
|
-
STRINGS.each do |str|
|
43
|
-
Unaccent.via_each_char(str)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
x.report('split_map') do
|
48
|
-
STRINGS.each do |str|
|
49
|
-
Unaccent.via_split_map(str)
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
|
54
|
-
x.report('scan (sixarm)') do
|
55
|
-
STRINGS.each do |str|
|
56
|
-
str.unaccent_via_scan
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
x.report('each_char (sixarm)') do
|
61
|
-
STRINGS.each do |str|
|
62
|
-
str.unaccent_via_each_char
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
x.report('split_map (sixarm)') do
|
67
|
-
STRINGS.each do |str|
|
68
|
-
str.unaccent_via_split_map
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
Benchmark.ips do |x|
|
74
|
-
reports.call(x)
|
75
|
-
x.compare!
|
76
|
-
end
|
77
|
-
|
78
|
-
Benchmark.memory do |x|
|
79
|
-
reports.call(x)
|
80
|
-
x.compare!
|
81
|
-
end
|
data/bin/console
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'bundler/setup'
|
4
|
-
require 'unaccent'
|
5
|
-
|
6
|
-
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
-
# with your gem easier. You can also use a different console, if you like.
|
8
|
-
|
9
|
-
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
-
# require 'pry'
|
11
|
-
# Pry.start
|
12
|
-
|
13
|
-
require 'irb'
|
14
|
-
IRB.start(__FILE__)
|
data/bin/setup
DELETED
data/unaccent.gemspec
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
require_relative 'lib/unaccent/version'
|
2
|
-
|
3
|
-
Gem::Specification.new do |spec|
|
4
|
-
spec.name = 'unaccent'
|
5
|
-
spec.version = Unaccent::VERSION
|
6
|
-
spec.authors = ['Jonian Guveli']
|
7
|
-
spec.email = ['jonian@hardpixel.eu']
|
8
|
-
|
9
|
-
spec.summary = 'Replace accented characters with unaccented characters'
|
10
|
-
spec.description = 'Replace accented characters with unaccented characters.'
|
11
|
-
spec.homepage = 'https://github.com/hardpixel/unaccent'
|
12
|
-
spec.license = 'MIT'
|
13
|
-
|
14
|
-
# Specify which files should be added to the gem when it is released.
|
15
|
-
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
16
|
-
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
17
|
-
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
|
-
end
|
19
|
-
|
20
|
-
spec.require_paths = ['lib']
|
21
|
-
|
22
|
-
spec.add_development_dependency 'bundler', '~> 2.0'
|
23
|
-
spec.add_development_dependency 'minitest', '~> 5.0'
|
24
|
-
spec.add_development_dependency 'rake', '~> 13.0'
|
25
|
-
end
|