unaccent 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9659e66d5944e31de52b6ae6127f839eafffe2f0647705135d5d292913d5f0a0
4
- data.tar.gz: d7c141e5078de647653ddcb88f21acd9fdcce8bc49f5e4f618d0371f159f00a8
3
+ metadata.gz: 850131686c6638f3c48c8441ab13c2b4a6220b752aaaab56e89fe19a274715fc
4
+ data.tar.gz: 2a026d497598b7303b6ee54b0d9f46b10d7015fff20ed2ad80b060448f638b42
5
5
  SHA512:
6
- metadata.gz: d8791eb553569a93862373effcd0a3be832c05a85526c5b7d9d97218eab245b06c371eef607110958f1f8a12feef3af01d94f3b90bf892aa8c7e41c155d9d1bf
7
- data.tar.gz: d3555caf43ca4d7d7e7ebbc526bd745482003d856cbc449183e031c30f53a3d33f975dc0070f17c9c85fd22068036e84617ded5031e063c19582272834aa8321
6
+ metadata.gz: dc135100fc86657bd4538ce6a9081a8e02e279d4903b52361b99b88d5d517a6247c08b2590abdbf726e3e27d4c77afb9036bdc2163cc1d1afc2ff541fafb895a
7
+ data.tar.gz: a52a5e96a933058c774dc4e35b5a12f9982597cdf3511a6e3db9c6d299f07e337e24ad5455e3cf5e2f33caf270ec0e4426230ef589ec5d022216724b0fb52a87
data/LICENSE.txt CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2010-2020 Joel Parker Henderson
3
+ Copyright (c) 2010-2019 Joel Parker Henderson
4
4
  Copyright (c) 2022 Jonian Guveli
5
5
 
6
6
  Permission is hereby granted, free of charge, to any person obtaining a copy
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Unaccent
2
2
 
3
- Ruby gem to replace a string's accent characters with ASCII characters. Based on [SixArm Ruby Unaccent](https://github.com/SixArm/sixarm_ruby_unaccent).
3
+ Ruby gem to replace a string's accent characters with unaccented characters. Based on [SixArm Ruby Unaccent](https://github.com/SixArm/sixarm_ruby_unaccent).
4
4
 
5
5
  [![Gem Version](https://badge.fury.io/rb/unaccent.svg)](https://badge.fury.io/rb/unaccent)
6
6
  [![Build](https://github.com/hardpixel/unaccent/actions/workflows/build.yml/badge.svg)](https://github.com/hardpixel/unaccent/actions/workflows/build.yml)
@@ -39,24 +39,28 @@ require 'unaccent/string'
39
39
  ## Benchmark
40
40
 
41
41
  ```
42
- IPS Comparison:
43
- gsub: 33804.1 i/s
44
- each_char: 16639.9 i/s - 2.03x (± 0.00) slower
45
- each_char (sixarm): 13708.2 i/s - 2.47x (± 0.00) slower
46
- scan: 10390.0 i/s - 3.25x (± 0.00) slower
47
- scan (sixarm): 8765.8 i/s - 3.86x (± 0.00) slower
48
- split_map: 7800.9 i/s - 4.33x (± 0.00) slower
49
- split_map (sixarm): 6780.7 i/s - 4.99x (± 0.00) slower
50
-
51
-
52
- Memory Comparison:
53
- gsub: 5947 allocated
54
- each_char: 10704 allocated - 1.80x more
55
- scan: 15368 allocated - 2.58x more
56
- split_map: 20582 allocated - 3.46x more
57
- split_map (sixarm): 23185 allocated - 3.90x more
58
- each_char (sixarm): 24979 allocated - 4.20x more
59
- scan (sixarm): 29979 allocated - 5.04x more
42
+ Warming up --------------------------------------
43
+ unaccent 3.259k i/100ms
44
+ sixarm 838.000 i/100ms
45
+ Calculating -------------------------------------
46
+ unaccent 32.573k (± 2.1%) i/s - 162.950k in 5.004780s
47
+ sixarm 8.025k (± 4.6%) i/s - 40.224k in 5.023339s
48
+
49
+ Comparison:
50
+ unaccent: 32573.1 i/s
51
+ sixarm: 8024.7 i/s - 4.06x (± 0.00) slower
52
+
53
+ Calculating -------------------------------------
54
+ unaccent 5.947k memsize ( 0.000 retained)
55
+ 76.000 objects ( 0.000 retained)
56
+ 42.000 strings ( 0.000 retained)
57
+ sixarm 29.979k memsize ( 0.000 retained)
58
+ 633.000 objects ( 0.000 retained)
59
+ 50.000 strings ( 0.000 retained)
60
+
61
+ Comparison:
62
+ unaccent: 5947 allocated
63
+ sixarm: 29979 allocated - 5.04x more
60
64
  ```
61
65
 
62
66
  ## Development
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Unaccent
2
4
  ACCENTMAP = {
3
5
  # 00A0 NO-BREAK SPACE
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'unaccent'
2
4
 
3
5
  module Unaccent
@@ -5,18 +7,6 @@ module Unaccent
5
7
  def unaccent
6
8
  Unaccent.unaccent(self)
7
9
  end
8
-
9
- def unaccent_via_gsub
10
- Unaccent.via_gsub(self)
11
- end
12
-
13
- def unaccent_via_each_char
14
- Unaccent.via_each_char(self)
15
- end
16
-
17
- def unaccent_via_split_map
18
- Unaccent.via_split_map(self)
19
- end
20
10
  end
21
11
  end
22
12
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Unaccent
2
- VERSION = '0.2.0'.freeze
4
+ VERSION = '0.3.0'
3
5
  end
data/lib/unaccent.rb CHANGED
@@ -1,7 +1,10 @@
1
- require 'unaccent/accentmap'
1
+ # frozen_string_literal: true
2
+
2
3
  require 'unaccent/version'
3
4
 
4
5
  module Unaccent
6
+ autoload :ACCENTMAP, 'unaccent/accentmap'
7
+
5
8
  class << self
6
9
  # Replace a string's accented characters with unaccented characters.
7
10
  #
@@ -12,67 +15,9 @@ module Unaccent
12
15
  # @return [String] a string that has no accents
13
16
 
14
17
  def unaccent(str)
15
- via_gsub(str)
16
- end
17
-
18
- # Replace a string's accented characters with unaccented characters,
19
- # by using string `#gsub` to replace non-ascii characters.
20
- #
21
- # @example
22
- # str = 'Å Ç ß'
23
- # Unaccent.via_gsub(str) = > 'AA C ss'
24
- #
25
- # @return [String] a string that has no accents
26
-
27
- def via_gsub(str)
28
- return str if str.ascii_only?
29
-
30
- str.gsub(/[^[:ascii:]]/) { |c| ACCENTMAP.fetch(c, c) }
31
- end
32
-
33
- # Replace a string's accented characters with unaccented characters,
34
- # by using string `#scan` to iterate on characters.
35
- #
36
- # @example
37
- # str = 'Å Ç ß'
38
- # Unaccent.via_scan(str) = > 'AA C ss'
39
- #
40
- # @return [String] a string that has no accents
41
-
42
- def via_scan(str)
43
- return str if str.ascii_only?
44
-
45
- res = ''; str.scan(/./) { |c| res << ACCENTMAP.fetch(c, c) }; res
46
- end
47
-
48
- # Replace a string's accented characters with unaccented characters,
49
- # by using string `#each_char` to iterate on characters.
50
- #
51
- # @example
52
- # str = 'Å Ç ß'
53
- # Unaccent.via_each_char(str) = > 'AA C ss'
54
- #
55
- # @return [String] a string that has no accents
56
-
57
- def via_each_char(str)
58
- return str if str.ascii_only?
59
-
60
- res = ''; str.each_char { |c| res << ACCENTMAP.fetch(c, c) }; res
61
- end
62
-
63
- # Replace a string's accented characters with unaccented characters,
64
- # by using string `#split` and `#map` to iterate on characters.
65
- #
66
- # @example
67
- # str = 'Å Ç ß'
68
- # Unaccent.via_split_map(str) = > 'AA C ss'
69
- #
70
- # @return [String] a string that has no accents
71
-
72
- def via_split_map(str)
73
18
  return str if str.ascii_only?
74
19
 
75
- str.split(//u).map { |c| ACCENTMAP.fetch(c, c) }.join
20
+ str.gsub(/[^[:ascii:]]/) { |char| ACCENTMAP.fetch(char, char) }
76
21
  end
77
22
  end
78
23
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unaccent
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonian Guveli
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-08-27 00:00:00.000000000 Z
11
+ date: 2022-08-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -59,22 +59,13 @@ executables: []
59
59
  extensions: []
60
60
  extra_rdoc_files: []
61
61
  files:
62
- - ".github/workflows/build.yml"
63
- - ".gitignore"
64
- - ".ruby-version"
65
62
  - ".yardopts"
66
- - Gemfile
67
63
  - LICENSE.txt
68
64
  - README.md
69
- - Rakefile
70
- - bin/benchmark
71
- - bin/console
72
- - bin/setup
73
65
  - lib/unaccent.rb
74
66
  - lib/unaccent/accentmap.rb
75
67
  - lib/unaccent/string.rb
76
68
  - lib/unaccent/version.rb
77
- - unaccent.gemspec
78
69
  homepage: https://github.com/hardpixel/unaccent
79
70
  licenses:
80
71
  - MIT
@@ -87,14 +78,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
87
78
  requirements:
88
79
  - - ">="
89
80
  - !ruby/object:Gem::Version
90
- version: '0'
81
+ version: '2.6'
91
82
  required_rubygems_version: !ruby/object:Gem::Requirement
92
83
  requirements:
93
84
  - - ">="
94
85
  - !ruby/object:Gem::Version
95
86
  version: '0'
96
87
  requirements: []
97
- rubygems_version: 3.3.7
88
+ rubygems_version: 3.3.14
98
89
  signing_key:
99
90
  specification_version: 4
100
91
  summary: Replace accented characters with unaccented characters
@@ -1,36 +0,0 @@
1
- ---
2
- name: Build
3
-
4
- on:
5
- push:
6
- branches: [master]
7
- paths-ignore:
8
- - 'README.md'
9
- - 'LICENSE.txt'
10
- - 'bin/**'
11
- pull_request:
12
- branches: [master]
13
-
14
- permissions:
15
- contents: read
16
-
17
- jobs:
18
- build:
19
- strategy:
20
- matrix:
21
- os: [ubuntu-latest]
22
- ruby: ['2.7', '3.1']
23
- runs-on: ${{ matrix.os }}
24
-
25
- steps:
26
- - uses: actions/checkout@v3
27
-
28
- - name: Set up Ruby
29
- uses: ruby/setup-ruby@v1
30
- with:
31
- ruby-version: ${{ matrix.ruby }}
32
- bundler-cache: true
33
-
34
- - name: Run tests
35
- run: |
36
- bundle exec rake test
data/.gitignore DELETED
@@ -1,9 +0,0 @@
1
- /.bundle/
2
- /.yardoc
3
- /_yardoc/
4
- /coverage/
5
- /doc/
6
- /pkg/
7
- /spec/reports/
8
- /tmp/
9
- Gemfile.lock
data/.ruby-version DELETED
@@ -1 +0,0 @@
1
- 3.1.2
data/Gemfile DELETED
@@ -1,4 +0,0 @@
1
- source 'https://rubygems.org'
2
-
3
- # Specify your gem's dependencies in buildex.gemspec
4
- gemspec
data/Rakefile DELETED
@@ -1,8 +0,0 @@
1
- require 'bundler/gem_tasks'
2
- require 'rake/testtask'
3
-
4
- Rake::TestTask.new(:test) do |t|
5
- t.libs << 'test'
6
- t.libs << 'lib'
7
- t.test_files = FileList['test/**/*_test.rb']
8
- end
data/bin/benchmark DELETED
@@ -1,81 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'bundler/inline'
4
- require 'tempfile'
5
-
6
- gemfile true, quiet: true do
7
- source 'https://rubygems.org'
8
- gemspec
9
-
10
- gem 'benchmark-ips'
11
- gem 'benchmark-memory'
12
- gem 'sixarm_ruby_unaccent'
13
- end
14
-
15
- STRINGS = [
16
- 'String without accents in english',
17
- 'Streng med aksenter på norsk',
18
- 'Řetězec s akcenty v češtině',
19
- 'Chaîne avec accents en français',
20
- 'Cuerda con acentos en español',
21
- 'Corda amb accents en català',
22
- 'Stīga ar akcentiem latviešu valodā',
23
- 'Türkçe aksanlı dize',
24
- 'Varg me theks në shqip',
25
- 'Κείμενο με τόνους στα ελληνικά'
26
- ]
27
-
28
- reports = lambda do |x|
29
- x.report('gsub') do
30
- STRINGS.each do |str|
31
- Unaccent.via_gsub(str)
32
- end
33
- end
34
-
35
- x.report('scan') do
36
- STRINGS.each do |str|
37
- Unaccent.via_scan(str)
38
- end
39
- end
40
-
41
- x.report('each_char') do
42
- STRINGS.each do |str|
43
- Unaccent.via_each_char(str)
44
- end
45
- end
46
-
47
- x.report('split_map') do
48
- STRINGS.each do |str|
49
- Unaccent.via_split_map(str)
50
- end
51
- end
52
-
53
-
54
- x.report('scan (sixarm)') do
55
- STRINGS.each do |str|
56
- str.unaccent_via_scan
57
- end
58
- end
59
-
60
- x.report('each_char (sixarm)') do
61
- STRINGS.each do |str|
62
- str.unaccent_via_each_char
63
- end
64
- end
65
-
66
- x.report('split_map (sixarm)') do
67
- STRINGS.each do |str|
68
- str.unaccent_via_split_map
69
- end
70
- end
71
- end
72
-
73
- Benchmark.ips do |x|
74
- reports.call(x)
75
- x.compare!
76
- end
77
-
78
- Benchmark.memory do |x|
79
- reports.call(x)
80
- x.compare!
81
- end
data/bin/console DELETED
@@ -1,14 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'bundler/setup'
4
- require 'unaccent'
5
-
6
- # You can add fixtures and/or initialization code here to make experimenting
7
- # with your gem easier. You can also use a different console, if you like.
8
-
9
- # (If you use this, don't forget to add pry to your Gemfile!)
10
- # require 'pry'
11
- # Pry.start
12
-
13
- require 'irb'
14
- IRB.start(__FILE__)
data/bin/setup DELETED
@@ -1,8 +0,0 @@
1
- #!/usr/bin/env bash
2
- set -euo pipefail
3
- IFS=$'\n\t'
4
- set -vx
5
-
6
- bundle install
7
-
8
- # Do any other automated setup that you need to do here
data/unaccent.gemspec DELETED
@@ -1,25 +0,0 @@
1
- require_relative 'lib/unaccent/version'
2
-
3
- Gem::Specification.new do |spec|
4
- spec.name = 'unaccent'
5
- spec.version = Unaccent::VERSION
6
- spec.authors = ['Jonian Guveli']
7
- spec.email = ['jonian@hardpixel.eu']
8
-
9
- spec.summary = 'Replace accented characters with unaccented characters'
10
- spec.description = 'Replace accented characters with unaccented characters.'
11
- spec.homepage = 'https://github.com/hardpixel/unaccent'
12
- spec.license = 'MIT'
13
-
14
- # Specify which files should be added to the gem when it is released.
15
- # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
16
- spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
17
- `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
- end
19
-
20
- spec.require_paths = ['lib']
21
-
22
- spec.add_development_dependency 'bundler', '~> 2.0'
23
- spec.add_development_dependency 'minitest', '~> 5.0'
24
- spec.add_development_dependency 'rake', '~> 13.0'
25
- end