unaccent 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +1 -1
- data/README.md +23 -19
- data/lib/unaccent/accentmap.rb +2 -0
- data/lib/unaccent/string.rb +2 -12
- data/lib/unaccent/version.rb +3 -1
- data/lib/unaccent.rb +5 -60
- metadata +4 -13
- data/.github/workflows/build.yml +0 -36
- data/.gitignore +0 -9
- data/.ruby-version +0 -1
- data/Gemfile +0 -4
- data/Rakefile +0 -8
- data/bin/benchmark +0 -81
- data/bin/console +0 -14
- data/bin/setup +0 -8
- data/unaccent.gemspec +0 -25
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 850131686c6638f3c48c8441ab13c2b4a6220b752aaaab56e89fe19a274715fc
|
4
|
+
data.tar.gz: 2a026d497598b7303b6ee54b0d9f46b10d7015fff20ed2ad80b060448f638b42
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dc135100fc86657bd4538ce6a9081a8e02e279d4903b52361b99b88d5d517a6247c08b2590abdbf726e3e27d4c77afb9036bdc2163cc1d1afc2ff541fafb895a
|
7
|
+
data.tar.gz: a52a5e96a933058c774dc4e35b5a12f9982597cdf3511a6e3db9c6d299f07e337e24ad5455e3cf5e2f33caf270ec0e4426230ef589ec5d022216724b0fb52a87
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Unaccent
|
2
2
|
|
3
|
-
Ruby gem to replace a string's accent characters with
|
3
|
+
Ruby gem to replace a string's accent characters with unaccented characters. Based on [SixArm Ruby Unaccent](https://github.com/SixArm/sixarm_ruby_unaccent).
|
4
4
|
|
5
5
|
[](https://badge.fury.io/rb/unaccent)
|
6
6
|
[](https://github.com/hardpixel/unaccent/actions/workflows/build.yml)
|
@@ -39,24 +39,28 @@ require 'unaccent/string'
|
|
39
39
|
## Benchmark
|
40
40
|
|
41
41
|
```
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
42
|
+
Warming up --------------------------------------
|
43
|
+
unaccent 3.259k i/100ms
|
44
|
+
sixarm 838.000 i/100ms
|
45
|
+
Calculating -------------------------------------
|
46
|
+
unaccent 32.573k (± 2.1%) i/s - 162.950k in 5.004780s
|
47
|
+
sixarm 8.025k (± 4.6%) i/s - 40.224k in 5.023339s
|
48
|
+
|
49
|
+
Comparison:
|
50
|
+
unaccent: 32573.1 i/s
|
51
|
+
sixarm: 8024.7 i/s - 4.06x (± 0.00) slower
|
52
|
+
|
53
|
+
Calculating -------------------------------------
|
54
|
+
unaccent 5.947k memsize ( 0.000 retained)
|
55
|
+
76.000 objects ( 0.000 retained)
|
56
|
+
42.000 strings ( 0.000 retained)
|
57
|
+
sixarm 29.979k memsize ( 0.000 retained)
|
58
|
+
633.000 objects ( 0.000 retained)
|
59
|
+
50.000 strings ( 0.000 retained)
|
60
|
+
|
61
|
+
Comparison:
|
62
|
+
unaccent: 5947 allocated
|
63
|
+
sixarm: 29979 allocated - 5.04x more
|
60
64
|
```
|
61
65
|
|
62
66
|
## Development
|
data/lib/unaccent/accentmap.rb
CHANGED
data/lib/unaccent/string.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'unaccent'
|
2
4
|
|
3
5
|
module Unaccent
|
@@ -5,18 +7,6 @@ module Unaccent
|
|
5
7
|
def unaccent
|
6
8
|
Unaccent.unaccent(self)
|
7
9
|
end
|
8
|
-
|
9
|
-
def unaccent_via_gsub
|
10
|
-
Unaccent.via_gsub(self)
|
11
|
-
end
|
12
|
-
|
13
|
-
def unaccent_via_each_char
|
14
|
-
Unaccent.via_each_char(self)
|
15
|
-
end
|
16
|
-
|
17
|
-
def unaccent_via_split_map
|
18
|
-
Unaccent.via_split_map(self)
|
19
|
-
end
|
20
10
|
end
|
21
11
|
end
|
22
12
|
|
data/lib/unaccent/version.rb
CHANGED
data/lib/unaccent.rb
CHANGED
@@ -1,7 +1,10 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
require 'unaccent/version'
|
3
4
|
|
4
5
|
module Unaccent
|
6
|
+
autoload :ACCENTMAP, 'unaccent/accentmap'
|
7
|
+
|
5
8
|
class << self
|
6
9
|
# Replace a string's accented characters with unaccented characters.
|
7
10
|
#
|
@@ -12,67 +15,9 @@ module Unaccent
|
|
12
15
|
# @return [String] a string that has no accents
|
13
16
|
|
14
17
|
def unaccent(str)
|
15
|
-
via_gsub(str)
|
16
|
-
end
|
17
|
-
|
18
|
-
# Replace a string's accented characters with unaccented characters,
|
19
|
-
# by using string `#gsub` to replace non-ascii characters.
|
20
|
-
#
|
21
|
-
# @example
|
22
|
-
# str = 'Å Ç ß'
|
23
|
-
# Unaccent.via_gsub(str) = > 'AA C ss'
|
24
|
-
#
|
25
|
-
# @return [String] a string that has no accents
|
26
|
-
|
27
|
-
def via_gsub(str)
|
28
|
-
return str if str.ascii_only?
|
29
|
-
|
30
|
-
str.gsub(/[^[:ascii:]]/) { |c| ACCENTMAP.fetch(c, c) }
|
31
|
-
end
|
32
|
-
|
33
|
-
# Replace a string's accented characters with unaccented characters,
|
34
|
-
# by using string `#scan` to iterate on characters.
|
35
|
-
#
|
36
|
-
# @example
|
37
|
-
# str = 'Å Ç ß'
|
38
|
-
# Unaccent.via_scan(str) = > 'AA C ss'
|
39
|
-
#
|
40
|
-
# @return [String] a string that has no accents
|
41
|
-
|
42
|
-
def via_scan(str)
|
43
|
-
return str if str.ascii_only?
|
44
|
-
|
45
|
-
res = ''; str.scan(/./) { |c| res << ACCENTMAP.fetch(c, c) }; res
|
46
|
-
end
|
47
|
-
|
48
|
-
# Replace a string's accented characters with unaccented characters,
|
49
|
-
# by using string `#each_char` to iterate on characters.
|
50
|
-
#
|
51
|
-
# @example
|
52
|
-
# str = 'Å Ç ß'
|
53
|
-
# Unaccent.via_each_char(str) = > 'AA C ss'
|
54
|
-
#
|
55
|
-
# @return [String] a string that has no accents
|
56
|
-
|
57
|
-
def via_each_char(str)
|
58
|
-
return str if str.ascii_only?
|
59
|
-
|
60
|
-
res = ''; str.each_char { |c| res << ACCENTMAP.fetch(c, c) }; res
|
61
|
-
end
|
62
|
-
|
63
|
-
# Replace a string's accented characters with unaccented characters,
|
64
|
-
# by using string `#split` and `#map` to iterate on characters.
|
65
|
-
#
|
66
|
-
# @example
|
67
|
-
# str = 'Å Ç ß'
|
68
|
-
# Unaccent.via_split_map(str) = > 'AA C ss'
|
69
|
-
#
|
70
|
-
# @return [String] a string that has no accents
|
71
|
-
|
72
|
-
def via_split_map(str)
|
73
18
|
return str if str.ascii_only?
|
74
19
|
|
75
|
-
str.
|
20
|
+
str.gsub(/[^[:ascii:]]/) { |char| ACCENTMAP.fetch(char, char) }
|
76
21
|
end
|
77
22
|
end
|
78
23
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unaccent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonian Guveli
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-08-
|
11
|
+
date: 2022-08-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -59,22 +59,13 @@ executables: []
|
|
59
59
|
extensions: []
|
60
60
|
extra_rdoc_files: []
|
61
61
|
files:
|
62
|
-
- ".github/workflows/build.yml"
|
63
|
-
- ".gitignore"
|
64
|
-
- ".ruby-version"
|
65
62
|
- ".yardopts"
|
66
|
-
- Gemfile
|
67
63
|
- LICENSE.txt
|
68
64
|
- README.md
|
69
|
-
- Rakefile
|
70
|
-
- bin/benchmark
|
71
|
-
- bin/console
|
72
|
-
- bin/setup
|
73
65
|
- lib/unaccent.rb
|
74
66
|
- lib/unaccent/accentmap.rb
|
75
67
|
- lib/unaccent/string.rb
|
76
68
|
- lib/unaccent/version.rb
|
77
|
-
- unaccent.gemspec
|
78
69
|
homepage: https://github.com/hardpixel/unaccent
|
79
70
|
licenses:
|
80
71
|
- MIT
|
@@ -87,14 +78,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
87
78
|
requirements:
|
88
79
|
- - ">="
|
89
80
|
- !ruby/object:Gem::Version
|
90
|
-
version: '
|
81
|
+
version: '2.6'
|
91
82
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
92
83
|
requirements:
|
93
84
|
- - ">="
|
94
85
|
- !ruby/object:Gem::Version
|
95
86
|
version: '0'
|
96
87
|
requirements: []
|
97
|
-
rubygems_version: 3.3.
|
88
|
+
rubygems_version: 3.3.14
|
98
89
|
signing_key:
|
99
90
|
specification_version: 4
|
100
91
|
summary: Replace accented characters with unaccented characters
|
data/.github/workflows/build.yml
DELETED
@@ -1,36 +0,0 @@
|
|
1
|
-
---
|
2
|
-
name: Build
|
3
|
-
|
4
|
-
on:
|
5
|
-
push:
|
6
|
-
branches: [master]
|
7
|
-
paths-ignore:
|
8
|
-
- 'README.md'
|
9
|
-
- 'LICENSE.txt'
|
10
|
-
- 'bin/**'
|
11
|
-
pull_request:
|
12
|
-
branches: [master]
|
13
|
-
|
14
|
-
permissions:
|
15
|
-
contents: read
|
16
|
-
|
17
|
-
jobs:
|
18
|
-
build:
|
19
|
-
strategy:
|
20
|
-
matrix:
|
21
|
-
os: [ubuntu-latest]
|
22
|
-
ruby: ['2.7', '3.1']
|
23
|
-
runs-on: ${{ matrix.os }}
|
24
|
-
|
25
|
-
steps:
|
26
|
-
- uses: actions/checkout@v3
|
27
|
-
|
28
|
-
- name: Set up Ruby
|
29
|
-
uses: ruby/setup-ruby@v1
|
30
|
-
with:
|
31
|
-
ruby-version: ${{ matrix.ruby }}
|
32
|
-
bundler-cache: true
|
33
|
-
|
34
|
-
- name: Run tests
|
35
|
-
run: |
|
36
|
-
bundle exec rake test
|
data/.gitignore
DELETED
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
3.1.2
|
data/Gemfile
DELETED
data/Rakefile
DELETED
data/bin/benchmark
DELETED
@@ -1,81 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'bundler/inline'
|
4
|
-
require 'tempfile'
|
5
|
-
|
6
|
-
gemfile true, quiet: true do
|
7
|
-
source 'https://rubygems.org'
|
8
|
-
gemspec
|
9
|
-
|
10
|
-
gem 'benchmark-ips'
|
11
|
-
gem 'benchmark-memory'
|
12
|
-
gem 'sixarm_ruby_unaccent'
|
13
|
-
end
|
14
|
-
|
15
|
-
STRINGS = [
|
16
|
-
'String without accents in english',
|
17
|
-
'Streng med aksenter på norsk',
|
18
|
-
'Řetězec s akcenty v češtině',
|
19
|
-
'Chaîne avec accents en français',
|
20
|
-
'Cuerda con acentos en español',
|
21
|
-
'Corda amb accents en català',
|
22
|
-
'Stīga ar akcentiem latviešu valodā',
|
23
|
-
'Türkçe aksanlı dize',
|
24
|
-
'Varg me theks në shqip',
|
25
|
-
'Κείμενο με τόνους στα ελληνικά'
|
26
|
-
]
|
27
|
-
|
28
|
-
reports = lambda do |x|
|
29
|
-
x.report('gsub') do
|
30
|
-
STRINGS.each do |str|
|
31
|
-
Unaccent.via_gsub(str)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
x.report('scan') do
|
36
|
-
STRINGS.each do |str|
|
37
|
-
Unaccent.via_scan(str)
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
x.report('each_char') do
|
42
|
-
STRINGS.each do |str|
|
43
|
-
Unaccent.via_each_char(str)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
x.report('split_map') do
|
48
|
-
STRINGS.each do |str|
|
49
|
-
Unaccent.via_split_map(str)
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
|
54
|
-
x.report('scan (sixarm)') do
|
55
|
-
STRINGS.each do |str|
|
56
|
-
str.unaccent_via_scan
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
x.report('each_char (sixarm)') do
|
61
|
-
STRINGS.each do |str|
|
62
|
-
str.unaccent_via_each_char
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
x.report('split_map (sixarm)') do
|
67
|
-
STRINGS.each do |str|
|
68
|
-
str.unaccent_via_split_map
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
Benchmark.ips do |x|
|
74
|
-
reports.call(x)
|
75
|
-
x.compare!
|
76
|
-
end
|
77
|
-
|
78
|
-
Benchmark.memory do |x|
|
79
|
-
reports.call(x)
|
80
|
-
x.compare!
|
81
|
-
end
|
data/bin/console
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'bundler/setup'
|
4
|
-
require 'unaccent'
|
5
|
-
|
6
|
-
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
-
# with your gem easier. You can also use a different console, if you like.
|
8
|
-
|
9
|
-
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
-
# require 'pry'
|
11
|
-
# Pry.start
|
12
|
-
|
13
|
-
require 'irb'
|
14
|
-
IRB.start(__FILE__)
|
data/bin/setup
DELETED
data/unaccent.gemspec
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
require_relative 'lib/unaccent/version'
|
2
|
-
|
3
|
-
Gem::Specification.new do |spec|
|
4
|
-
spec.name = 'unaccent'
|
5
|
-
spec.version = Unaccent::VERSION
|
6
|
-
spec.authors = ['Jonian Guveli']
|
7
|
-
spec.email = ['jonian@hardpixel.eu']
|
8
|
-
|
9
|
-
spec.summary = 'Replace accented characters with unaccented characters'
|
10
|
-
spec.description = 'Replace accented characters with unaccented characters.'
|
11
|
-
spec.homepage = 'https://github.com/hardpixel/unaccent'
|
12
|
-
spec.license = 'MIT'
|
13
|
-
|
14
|
-
# Specify which files should be added to the gem when it is released.
|
15
|
-
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
16
|
-
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
17
|
-
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
|
-
end
|
19
|
-
|
20
|
-
spec.require_paths = ['lib']
|
21
|
-
|
22
|
-
spec.add_development_dependency 'bundler', '~> 2.0'
|
23
|
-
spec.add_development_dependency 'minitest', '~> 5.0'
|
24
|
-
spec.add_development_dependency 'rake', '~> 13.0'
|
25
|
-
end
|