unaccent 0.1.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +2 -0
- data/CODE_OF_CONDUCT.md +37 -0
- data/LICENSE.txt +1 -0
- data/README.md +23 -19
- data/lib/unaccent/accentmap.rb +4 -2
- data/lib/unaccent/string.rb +6 -13
- data/lib/unaccent/version.rb +3 -1
- data/lib/unaccent.rb +8 -62
- metadata +20 -13
- data/.github/workflows/build.yml +0 -36
- data/.gitignore +0 -9
- data/.ruby-version +0 -1
- data/Gemfile +0 -4
- data/Rakefile +0 -8
- data/bin/benchmark +0 -81
- data/bin/console +0 -14
- data/bin/setup +0 -8
- data/unaccent.gemspec +0 -24
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1f56e5660dbb7639bb9ef70628cd7ecbcfc2c7bc7ae7bdb69e2a8ab2e5c9638e
|
4
|
+
data.tar.gz: 54bd414076246db4389c6646200c59eebd1372c445f3813b723ab29da3634dde
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9d8a3cfb89f5771db43ebf2aefa0e767d4777ebf762fe4847af25966df9b2cf69da28ddebc7f3c158bf8d069eba77f01174021e22b9dc267e8ddeb2334e31b28
|
7
|
+
data.tar.gz: 65937e76c7d787b76cd4d7a5509469ac00d6abfa6ceadb825b031c5b15ba9533c97de71439c46a73e34e58ae46f9ccb9a026622d1333e1779161ab42c04403cd
|
data/.yardopts
ADDED
data/CODE_OF_CONDUCT.md
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# Code of Conduct
|
2
|
+
|
3
|
+
All participants of this project are expected to abide by our Code of Conduct, both online and during in-person events that are hosted and/or associated with this project.
|
4
|
+
|
5
|
+
## The Pledge
|
6
|
+
|
7
|
+
In the interest of fostering an open and welcoming environment, we pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
|
8
|
+
|
9
|
+
## The Standards
|
10
|
+
|
11
|
+
Examples of behavior that contributes to creating a positive environment include:
|
12
|
+
|
13
|
+
* Using welcoming and inclusive language
|
14
|
+
* Being respectful of differing viewpoints and experiences
|
15
|
+
* Referring to people by their preferred pronouns and using gender-neutral pronouns when uncertain
|
16
|
+
* Gracefully accepting constructive criticism
|
17
|
+
* Focusing on what is best for the community
|
18
|
+
* Showing empathy towards other community members
|
19
|
+
|
20
|
+
Examples of unacceptable behavior by participants include:
|
21
|
+
|
22
|
+
* The use of sexualized language or imagery and unwelcome sexual attention or advances
|
23
|
+
* Trolling, insulting/derogatory comments, and personal or political attacks
|
24
|
+
* Public or private harassment
|
25
|
+
* Publishing others' private information, such as a physical or electronic address, without explicit permission
|
26
|
+
* Other conduct which could reasonably be considered inappropriate in a professional setting
|
27
|
+
* Dismissing or attacking inclusion-oriented requests
|
28
|
+
|
29
|
+
## Enforcement
|
30
|
+
|
31
|
+
Violations of the Code of Conduct may be reported by sending an email to info@hardpixel.eu. All reports will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. Further details of specific enforcement policies may be posted separately.
|
32
|
+
|
33
|
+
We hold the right and responsibility to remove comments or other contributions that are not aligned to this Code of Conduct, or to suspend temporarily or permanently any members for other behaviors that are inappropriate, threatening, offensive, or harmful.
|
34
|
+
|
35
|
+
## Attribution
|
36
|
+
|
37
|
+
This Code of Conduct is adapted from [dev.to](https://dev.to/code-of-conduct).
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Unaccent
|
2
2
|
|
3
|
-
Ruby gem to replace a string's accent characters with
|
3
|
+
Ruby gem to replace a string's accent characters with unaccented characters. Based on [SixArm Ruby Unaccent](https://github.com/SixArm/sixarm_ruby_unaccent).
|
4
4
|
|
5
5
|
[![Gem Version](https://badge.fury.io/rb/unaccent.svg)](https://badge.fury.io/rb/unaccent)
|
6
6
|
[![Build](https://github.com/hardpixel/unaccent/actions/workflows/build.yml/badge.svg)](https://github.com/hardpixel/unaccent/actions/workflows/build.yml)
|
@@ -39,24 +39,28 @@ require 'unaccent/string'
|
|
39
39
|
## Benchmark
|
40
40
|
|
41
41
|
```
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
42
|
+
Warming up --------------------------------------
|
43
|
+
unaccent 3.259k i/100ms
|
44
|
+
sixarm 838.000 i/100ms
|
45
|
+
Calculating -------------------------------------
|
46
|
+
unaccent 32.573k (± 2.1%) i/s - 162.950k in 5.004780s
|
47
|
+
sixarm 8.025k (± 4.6%) i/s - 40.224k in 5.023339s
|
48
|
+
|
49
|
+
Comparison:
|
50
|
+
unaccent: 32573.1 i/s
|
51
|
+
sixarm: 8024.7 i/s - 4.06x (± 0.00) slower
|
52
|
+
|
53
|
+
Calculating -------------------------------------
|
54
|
+
unaccent 5.947k memsize ( 0.000 retained)
|
55
|
+
76.000 objects ( 0.000 retained)
|
56
|
+
42.000 strings ( 0.000 retained)
|
57
|
+
sixarm 29.979k memsize ( 0.000 retained)
|
58
|
+
633.000 objects ( 0.000 retained)
|
59
|
+
50.000 strings ( 0.000 retained)
|
60
|
+
|
61
|
+
Comparison:
|
62
|
+
unaccent: 5947 allocated
|
63
|
+
sixarm: 29979 allocated - 5.04x more
|
60
64
|
```
|
61
65
|
|
62
66
|
## Development
|
data/lib/unaccent/accentmap.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Unaccent
|
2
4
|
ACCENTMAP = {
|
3
5
|
# 00A0 NO-BREAK SPACE
|
@@ -5038,7 +5040,7 @@ module Unaccent
|
|
5038
5040
|
# 2103 DEGREE CELSIUS
|
5039
5041
|
# -> 00B0 DEGREE SIGN
|
5040
5042
|
# + 0043 LATIN CAPITAL LETTER C
|
5041
|
-
"\u{2103}" => "\u{
|
5043
|
+
"\u{2103}" => "\u{B0}C",
|
5042
5044
|
|
5043
5045
|
# 2105 CARE OF
|
5044
5046
|
# -> 0063 LATIN SMALL LETTER C
|
@@ -5059,7 +5061,7 @@ module Unaccent
|
|
5059
5061
|
# 2109 DEGREE FAHRENHEIT
|
5060
5062
|
# -> 00B0 DEGREE SIGN
|
5061
5063
|
# + 0046 LATIN CAPITAL LETTER F
|
5062
|
-
"\u{2109}" => "\u{
|
5064
|
+
"\u{2109}" => "\u{B0}F",
|
5063
5065
|
|
5064
5066
|
# 210A SCRIPT SMALL G
|
5065
5067
|
# -> 0067 LATIN SMALL LETTER G
|
data/lib/unaccent/string.rb
CHANGED
@@ -1,23 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'unaccent'
|
2
4
|
|
3
5
|
module Unaccent
|
6
|
+
# Extend the String class with unaccent method.
|
4
7
|
module String
|
5
8
|
def unaccent
|
6
9
|
Unaccent.unaccent(self)
|
7
10
|
end
|
8
|
-
|
9
|
-
def unaccent_via_gsub
|
10
|
-
Unaccent.via_gsub(self)
|
11
|
-
end
|
12
|
-
|
13
|
-
def unaccent_via_each_char
|
14
|
-
Unaccent.via_each_char(self)
|
15
|
-
end
|
16
|
-
|
17
|
-
def unaccent_via_split_map
|
18
|
-
Unaccent.via_split_map(self)
|
19
|
-
end
|
20
11
|
end
|
21
12
|
end
|
22
13
|
|
23
|
-
|
14
|
+
class String
|
15
|
+
include Unaccent::String
|
16
|
+
end
|
data/lib/unaccent/version.rb
CHANGED
data/lib/unaccent.rb
CHANGED
@@ -1,78 +1,24 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
require 'unaccent/version'
|
3
4
|
|
5
|
+
# Replace accented characters with unaccented characters in a string.
|
4
6
|
module Unaccent
|
7
|
+
autoload :ACCENTMAP, 'unaccent/accentmap'
|
8
|
+
|
5
9
|
class << self
|
6
10
|
# Replace a string's accented characters with unaccented characters.
|
7
11
|
#
|
8
12
|
# @example
|
9
|
-
# s = 'Å Ç ß'
|
10
|
-
# Unaccent.unaccent(s) = > 'AA C ss'
|
11
|
-
#
|
12
|
-
# @return [String] a string that has no accents
|
13
|
-
|
14
|
-
def unaccent(str)
|
15
|
-
via_gsub(str)
|
16
|
-
end
|
17
|
-
|
18
|
-
# Replace a string's accented characters with unaccented characters,
|
19
|
-
# by using string `#gsub` to replace non-ascii characters.
|
20
|
-
#
|
21
|
-
# @example
|
22
13
|
# str = 'Å Ç ß'
|
23
|
-
# Unaccent.
|
14
|
+
# Unaccent.unaccent(str) = > 'AA C ss'
|
24
15
|
#
|
25
16
|
# @return [String] a string that has no accents
|
26
17
|
|
27
|
-
def
|
28
|
-
return str if str.ascii_only?
|
29
|
-
|
30
|
-
str.gsub(/[^[:ascii:]]/) { |c| ACCENTMAP.fetch(c, c) }
|
31
|
-
end
|
32
|
-
|
33
|
-
# Replace a string's accented characters with unaccented characters,
|
34
|
-
# by using string `#scan` to iterate on characters.
|
35
|
-
#
|
36
|
-
# @example
|
37
|
-
# str = 'Å Ç ß'
|
38
|
-
# Unaccent.via_scan(str) = > 'AA C ss'
|
39
|
-
#
|
40
|
-
# @return [String] a string that has no accents
|
41
|
-
|
42
|
-
def via_scan(str)
|
43
|
-
return str if str.ascii_only?
|
44
|
-
|
45
|
-
res = ''; str.scan(/./) { |c| res << ACCENTMAP.fetch(c, c) }; res
|
46
|
-
end
|
47
|
-
|
48
|
-
# Replace a string's accented characters with unaccented characters,
|
49
|
-
# by using string `#each_char` to iterate on characters.
|
50
|
-
#
|
51
|
-
# @example
|
52
|
-
# str = 'Å Ç ß'
|
53
|
-
# Unaccent.via_each_char(str) = > 'AA C ss'
|
54
|
-
#
|
55
|
-
# @return [String] a string that has no accents
|
56
|
-
|
57
|
-
def via_each_char(str)
|
58
|
-
return str if str.ascii_only?
|
59
|
-
|
60
|
-
res = ''; str.each_char { |c| res << ACCENTMAP.fetch(c, c) }; res
|
61
|
-
end
|
62
|
-
|
63
|
-
# Replace a string's accented characters with unaccented characters,
|
64
|
-
# by using string `#split` and `#map` to iterate on characters.
|
65
|
-
#
|
66
|
-
# @example
|
67
|
-
# str = 'Å Ç ß'
|
68
|
-
# Unaccent.via_split_map(str) = > 'AA C ss'
|
69
|
-
#
|
70
|
-
# @return [String] a string that has no accents
|
71
|
-
|
72
|
-
def via_split_map(str)
|
18
|
+
def unaccent(str)
|
73
19
|
return str if str.ascii_only?
|
74
20
|
|
75
|
-
str.
|
21
|
+
str.gsub(/[^[:ascii:]]/) { |char| ACCENTMAP.fetch(char, char) }
|
76
22
|
end
|
77
23
|
end
|
78
24
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unaccent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonian Guveli
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-08-
|
11
|
+
date: 2022-08-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '5.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '13.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '13.0'
|
41
55
|
description: Replace accented characters with unaccented characters.
|
42
56
|
email:
|
43
57
|
- jonian@hardpixel.eu
|
@@ -45,21 +59,14 @@ executables: []
|
|
45
59
|
extensions: []
|
46
60
|
extra_rdoc_files: []
|
47
61
|
files:
|
48
|
-
- ".
|
49
|
-
-
|
50
|
-
- ".ruby-version"
|
51
|
-
- Gemfile
|
62
|
+
- ".yardopts"
|
63
|
+
- CODE_OF_CONDUCT.md
|
52
64
|
- LICENSE.txt
|
53
65
|
- README.md
|
54
|
-
- Rakefile
|
55
|
-
- bin/benchmark
|
56
|
-
- bin/console
|
57
|
-
- bin/setup
|
58
66
|
- lib/unaccent.rb
|
59
67
|
- lib/unaccent/accentmap.rb
|
60
68
|
- lib/unaccent/string.rb
|
61
69
|
- lib/unaccent/version.rb
|
62
|
-
- unaccent.gemspec
|
63
70
|
homepage: https://github.com/hardpixel/unaccent
|
64
71
|
licenses:
|
65
72
|
- MIT
|
@@ -72,14 +79,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
72
79
|
requirements:
|
73
80
|
- - ">="
|
74
81
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
82
|
+
version: '2.6'
|
76
83
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
84
|
requirements:
|
78
85
|
- - ">="
|
79
86
|
- !ruby/object:Gem::Version
|
80
87
|
version: '0'
|
81
88
|
requirements: []
|
82
|
-
rubygems_version: 3.3.
|
89
|
+
rubygems_version: 3.3.7
|
83
90
|
signing_key:
|
84
91
|
specification_version: 4
|
85
92
|
summary: Replace accented characters with unaccented characters
|
data/.github/workflows/build.yml
DELETED
@@ -1,36 +0,0 @@
|
|
1
|
-
---
|
2
|
-
name: Build
|
3
|
-
|
4
|
-
on:
|
5
|
-
push:
|
6
|
-
branches: [master]
|
7
|
-
paths-ignore:
|
8
|
-
- 'README.md'
|
9
|
-
- 'LICENSE.txt'
|
10
|
-
- 'bin/**'
|
11
|
-
pull_request:
|
12
|
-
branches: [master]
|
13
|
-
|
14
|
-
permissions:
|
15
|
-
contents: read
|
16
|
-
|
17
|
-
jobs:
|
18
|
-
build:
|
19
|
-
strategy:
|
20
|
-
matrix:
|
21
|
-
os: [ubuntu-latest]
|
22
|
-
ruby: ['2.7']
|
23
|
-
runs-on: ${{ matrix.os }}
|
24
|
-
|
25
|
-
steps:
|
26
|
-
- uses: actions/checkout@v3
|
27
|
-
|
28
|
-
- name: Set up Ruby
|
29
|
-
uses: ruby/setup-ruby@v1
|
30
|
-
with:
|
31
|
-
ruby-version: ${{ matrix.ruby }}
|
32
|
-
bundler-cache: true
|
33
|
-
|
34
|
-
- name: Run tests
|
35
|
-
run: |
|
36
|
-
bundle exec rake test
|
data/.gitignore
DELETED
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
3.1.2
|
data/Gemfile
DELETED
data/Rakefile
DELETED
data/bin/benchmark
DELETED
@@ -1,81 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'bundler/inline'
|
4
|
-
require 'tempfile'
|
5
|
-
|
6
|
-
gemfile true, quiet: true do
|
7
|
-
source 'https://rubygems.org'
|
8
|
-
gemspec
|
9
|
-
|
10
|
-
gem 'benchmark-ips'
|
11
|
-
gem 'benchmark-memory'
|
12
|
-
gem 'sixarm_ruby_unaccent'
|
13
|
-
end
|
14
|
-
|
15
|
-
STRINGS = [
|
16
|
-
'String without accents in english',
|
17
|
-
'Streng med aksenter på norsk',
|
18
|
-
'Řetězec s akcenty v češtině',
|
19
|
-
'Chaîne avec accents en français',
|
20
|
-
'Cuerda con acentos en español',
|
21
|
-
'Corda amb accents en català',
|
22
|
-
'Stīga ar akcentiem latviešu valodā',
|
23
|
-
'Türkçe aksanlı dize',
|
24
|
-
'Varg me theks në shqip',
|
25
|
-
'Κείμενο με τόνους στα ελληνικά'
|
26
|
-
]
|
27
|
-
|
28
|
-
reports = lambda do |x|
|
29
|
-
x.report('gsub') do
|
30
|
-
STRINGS.each do |str|
|
31
|
-
Unaccent.via_gsub(str)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
x.report('scan') do
|
36
|
-
STRINGS.each do |str|
|
37
|
-
Unaccent.via_scan(str)
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
x.report('each_char') do
|
42
|
-
STRINGS.each do |str|
|
43
|
-
Unaccent.via_each_char(str)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
x.report('split_map') do
|
48
|
-
STRINGS.each do |str|
|
49
|
-
Unaccent.via_split_map(str)
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
|
54
|
-
x.report('scan (sixarm)') do
|
55
|
-
STRINGS.each do |str|
|
56
|
-
str.unaccent_via_scan
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
x.report('each_char (sixarm)') do
|
61
|
-
STRINGS.each do |str|
|
62
|
-
str.unaccent_via_each_char
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
x.report('split_map (sixarm)') do
|
67
|
-
STRINGS.each do |str|
|
68
|
-
str.unaccent_via_split_map
|
69
|
-
end
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
Benchmark.ips do |x|
|
74
|
-
reports.call(x)
|
75
|
-
x.compare!
|
76
|
-
end
|
77
|
-
|
78
|
-
Benchmark.memory do |x|
|
79
|
-
reports.call(x)
|
80
|
-
x.compare!
|
81
|
-
end
|
data/bin/console
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'bundler/setup'
|
4
|
-
require 'unaccent'
|
5
|
-
|
6
|
-
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
-
# with your gem easier. You can also use a different console, if you like.
|
8
|
-
|
9
|
-
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
-
# require 'pry'
|
11
|
-
# Pry.start
|
12
|
-
|
13
|
-
require 'irb'
|
14
|
-
IRB.start(__FILE__)
|
data/bin/setup
DELETED
data/unaccent.gemspec
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
require_relative 'lib/unaccent/version'
|
2
|
-
|
3
|
-
Gem::Specification.new do |spec|
|
4
|
-
spec.name = 'unaccent'
|
5
|
-
spec.version = Unaccent::VERSION
|
6
|
-
spec.authors = ['Jonian Guveli']
|
7
|
-
spec.email = ['jonian@hardpixel.eu']
|
8
|
-
|
9
|
-
spec.summary = 'Replace accented characters with unaccented characters'
|
10
|
-
spec.description = 'Replace accented characters with unaccented characters.'
|
11
|
-
spec.homepage = 'https://github.com/hardpixel/unaccent'
|
12
|
-
spec.license = 'MIT'
|
13
|
-
|
14
|
-
# Specify which files should be added to the gem when it is released.
|
15
|
-
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
16
|
-
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
17
|
-
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
|
-
end
|
19
|
-
|
20
|
-
spec.require_paths = ['lib']
|
21
|
-
|
22
|
-
spec.add_development_dependency 'bundler', '~> 2.0'
|
23
|
-
spec.add_development_dependency 'minitest', '~> 5.0'
|
24
|
-
end
|