unf_ext 0.0.8.2.beta-x64-mingw-ucrt
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.github/workflows/unf_ext.yml +43 -0
- data/.gitignore +19 -0
- data/CHANGELOG.md +62 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +57 -0
- data/Rakefile +37 -0
- data/ext/unf_ext/extconf.rb +50 -0
- data/ext/unf_ext/unf/normalizer.hh +139 -0
- data/ext/unf_ext/unf/table.hh +13617 -0
- data/ext/unf_ext/unf/trie/char_stream.hh +150 -0
- data/ext/unf_ext/unf/trie/node.hh +25 -0
- data/ext/unf_ext/unf/trie/searcher.hh +194 -0
- data/ext/unf_ext/unf/util.hh +24 -0
- data/ext/unf_ext/unf.cc +75 -0
- data/lib/3.1/unf_ext.so +0 -0
- data/lib/unf_ext/version.rb +5 -0
- data/lib/unf_ext.rb +5 -0
- data/test/helper.rb +18 -0
- data/test/normalization-test.txt +112332 -0
- data/test/test_unf_ext.rb +40 -0
- data/unf_ext.gemspec +34 -0
- metadata +157 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 0df2b2042b01091aaf309d44de895e1085657b92f1fe7f37c87beb41c0242f59
|
4
|
+
data.tar.gz: a9b406b2790051a65f2fedb2942bfecb6fa46836d25f096e1e5519380a4d57e8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 607c0bf6f8c11b205c7b755525ead44b5e90b21dcebfb90e0932b1f5ae09c79e0b92d8e804f8fec28c92296198bd7514097c98cb2b904bfac6a0886ccd6973ce
|
7
|
+
data.tar.gz: 12fc5c1d7f14828b9e07c4a986cd0f60084211685e6339ff63cdede0bab2bc41c9a1c1f774b207f7c3fbcffc774ed7a20ec88dd2d6635f3d77aebbdc132e720a
|
data/.document
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on: [push, pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
name: >-
|
8
|
+
${{ matrix.os }} ${{ matrix.ruby }}
|
9
|
+
|
10
|
+
runs-on: ${{ matrix.os }}
|
11
|
+
strategy:
|
12
|
+
fail-fast: false
|
13
|
+
matrix:
|
14
|
+
os: [ ubuntu-20.04, macos-11.0, windows-2019 ]
|
15
|
+
ruby: [ 2.6, 2.7, "3.0", 3.1, head ]
|
16
|
+
include:
|
17
|
+
- { os: windows-2019, ruby: mingw }
|
18
|
+
exclude:
|
19
|
+
- { os: windows-2019, ruby: head }
|
20
|
+
|
21
|
+
steps:
|
22
|
+
- name: repo checkout
|
23
|
+
uses: actions/checkout@v2
|
24
|
+
|
25
|
+
- name: load ruby cross-compilation toolkit
|
26
|
+
uses: MSP-Greg/setup-ruby-pkgs@v1
|
27
|
+
with:
|
28
|
+
ruby-version: ${{ matrix.ruby }}
|
29
|
+
mingw: _upgrade_
|
30
|
+
|
31
|
+
- name: bundle install
|
32
|
+
shell: pwsh
|
33
|
+
run: |
|
34
|
+
bundle config set --local path .bundle/vendor
|
35
|
+
bundle install --jobs 4 --retry 3
|
36
|
+
|
37
|
+
- name: compile
|
38
|
+
timeout-minutes: 5
|
39
|
+
run: bundle exec rake compile
|
40
|
+
|
41
|
+
- name: test
|
42
|
+
timeout-minutes: 5
|
43
|
+
run: bundle exec rake test
|
data/.gitignore
ADDED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
## 0.0.8.2 (2022-04-13)
|
2
|
+
|
3
|
+
- Add x64-mingw-ucrt native gem support for RubyInstaller 3.1.
|
4
|
+
|
5
|
+
## 0.0.8.1 (2022-03-13)
|
6
|
+
|
7
|
+
- Include Windows binaries for Ruby 3.1. (FAIL)
|
8
|
+
|
9
|
+
## 0.0.8 (2021-09-14)
|
10
|
+
|
11
|
+
- No functional change in the library code.
|
12
|
+
- Include Windows binaries for Ruby 3.0.
|
13
|
+
- Drop support for Ruby 2.1 and earlier.
|
14
|
+
- Replace Travis CI with Github Actions.
|
15
|
+
- Fix cross-build after upgrading rake-compiler/rake-compiler-dock to 1.1.1/1.1.0.
|
16
|
+
|
17
|
+
## 0.0.7.7 (2020-03-30)
|
18
|
+
|
19
|
+
- Include Windows binaries for Ruby 2.7.
|
20
|
+
|
21
|
+
## 0.0.7.6 (2019-03-19)
|
22
|
+
|
23
|
+
- Include Windows binaries for Ruby 2.6.
|
24
|
+
|
25
|
+
## 0.0.7.5 (2018-02-06)
|
26
|
+
|
27
|
+
- Include Windows binaries for Ruby 2.5.
|
28
|
+
|
29
|
+
## 0.0.7.4 (2017-04-19)
|
30
|
+
|
31
|
+
- Fix build on ARM and GCC 6 again.
|
32
|
+
|
33
|
+
## 0.0.7.3 (2017-04-11)
|
34
|
+
|
35
|
+
- Update the base Unicode version to 9.
|
36
|
+
|
37
|
+
- Fix compile issues on ARM and GCC 6.
|
38
|
+
|
39
|
+
## 0.0.7.2 (2016-02-01)
|
40
|
+
|
41
|
+
- Include Windows binaries for Ruby 2.3.
|
42
|
+
|
43
|
+
## 0.0.7.1 (2015-04-18)
|
44
|
+
|
45
|
+
- Windows fat binary gems no longer require libstd++ to
|
46
|
+
run, which are statically linked.
|
47
|
+
|
48
|
+
- Add a fat binary gem for x64-mingw32 (64bit Windows).
|
49
|
+
|
50
|
+
- Windows fat binary gems now include binaries for Ruby up to 2.2.
|
51
|
+
|
52
|
+
## 0.0.6 (2013-02-16)
|
53
|
+
|
54
|
+
- Migrate from Jeweler to Bundler.
|
55
|
+
|
56
|
+
## 0.0.5 (2012-05-30)
|
57
|
+
|
58
|
+
- Fix a type error for strict compilers.
|
59
|
+
|
60
|
+
## 0.0.4 (2011-12-08)
|
61
|
+
|
62
|
+
- Release under the current name of `unf_ext`.
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2010 Takeru Ohta <phjgt308@gmail.com>
|
4
|
+
Copyright (c) 2011-2018 Akinori MUSHA <knu@idaemons.org> (extended Ruby support)
|
5
|
+
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
8
|
+
in the Software without restriction, including without limitation the rights
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
11
|
+
furnished to do so, subject to the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be included in
|
14
|
+
all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
22
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
ruby-unf_ext
|
2
|
+
============
|
3
|
+
|
4
|
+
Synopsis
|
5
|
+
--------
|
6
|
+
|
7
|
+
* Unicode Normalization Form support library for CRuby
|
8
|
+
|
9
|
+
Description
|
10
|
+
-----------
|
11
|
+
|
12
|
+
* Normalizes UTF-8 strings into and from NFC, NFD, NFKC or NFKD
|
13
|
+
|
14
|
+
# For bulk conversion
|
15
|
+
normalizer = UNF::Normalizer.new
|
16
|
+
a_bunch_of_strings.map! { |string|
|
17
|
+
normalizer.normalize(string, :nfc) #=> string in NFC
|
18
|
+
}
|
19
|
+
|
20
|
+
* Compliant with Unicode 9.0
|
21
|
+
|
22
|
+
Requirement
|
23
|
+
-----------
|
24
|
+
|
25
|
+
* Ruby 1.8.7+, 1.9.2+
|
26
|
+
|
27
|
+
* C++ compiler and libstdc++
|
28
|
+
|
29
|
+
Installation
|
30
|
+
------------
|
31
|
+
|
32
|
+
gem install unf_ext
|
33
|
+
|
34
|
+
Or:
|
35
|
+
|
36
|
+
ruby extconf.rb && make && make install
|
37
|
+
|
38
|
+
Development Resources
|
39
|
+
---------------------
|
40
|
+
|
41
|
+
* https://github.com/sile/unf
|
42
|
+
|
43
|
+
For issues regarding files under the directory `unf`, please
|
44
|
+
contact this upstream.
|
45
|
+
|
46
|
+
* https://github.com/knu/ruby-unf_ext
|
47
|
+
|
48
|
+
The development site and the repository.
|
49
|
+
|
50
|
+
License
|
51
|
+
-------
|
52
|
+
|
53
|
+
Copyright (c) 2010-2017 Takeru Ohta
|
54
|
+
Copyright (c) 2011-2018 Akinori MUSHA
|
55
|
+
|
56
|
+
Licensed under the MIT license.
|
57
|
+
See `LICENSE` for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
|
3
|
+
gemspec = Bundler::GemHelper.gemspec
|
4
|
+
|
5
|
+
native_platforms = %w[
|
6
|
+
x86-mingw32
|
7
|
+
x64-mingw32
|
8
|
+
x64-mingw-ucrt
|
9
|
+
]
|
10
|
+
|
11
|
+
require 'rake/extensiontask'
|
12
|
+
Rake::ExtensionTask.new('unf_ext', gemspec) do |ext|
|
13
|
+
ext.cross_compile = true
|
14
|
+
ext.cross_platform = native_platforms
|
15
|
+
ext.cross_config_options << '--with-ldflags="-static-libgcc"' << '--with-static-libstdc++'
|
16
|
+
end
|
17
|
+
|
18
|
+
namespace :gem do
|
19
|
+
task :native do
|
20
|
+
require 'rake_compiler_dock'
|
21
|
+
sh 'bundle package --all'
|
22
|
+
native_platforms.each do |plat|
|
23
|
+
RakeCompilerDock.sh "bundle --local && rake native:#{plat} gem", platform: plat
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
task :gems => %i[build gem:native]
|
29
|
+
|
30
|
+
require 'rake/testtask'
|
31
|
+
Rake::TestTask.new(:test) do |test|
|
32
|
+
test.libs << 'test'
|
33
|
+
test.test_files = gemspec.test_files
|
34
|
+
test.verbose = true
|
35
|
+
end
|
36
|
+
|
37
|
+
task :default => :test
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'mkmf'
|
2
|
+
|
3
|
+
if with_config('static-libstdc++')
|
4
|
+
$LDFLAGS << ' ' << `#{CONFIG['CC']} -print-file-name=libstdc++.a`.chomp
|
5
|
+
else
|
6
|
+
have_library('stdc++')
|
7
|
+
|
8
|
+
case RbConfig::CONFIG['host_os']
|
9
|
+
when /solaris(!?2.11)/
|
10
|
+
# Do a little trickery here to enable C++ standard on Solaris 11 if found.
|
11
|
+
# This also forces 64bit compilation mode.
|
12
|
+
$CXX = CONFIG['CXX']
|
13
|
+
$CXX << ' ' << '-m64'
|
14
|
+
$CFLAGS = CONFIG['CFLAGS'].gsub(/-std=c99/, '')
|
15
|
+
$CFLAGS << ' ' << '-m64 -std=c++11'
|
16
|
+
$CPPFLAGS = CONFIG['CFLAGS'].gsub(/-std=c99/, '')
|
17
|
+
$CPPFLAGS << ' ' << '-m64 -std=c++11'
|
18
|
+
$CXXFLAGS = CONFIG['CFLAGS'].gsub(/-std=c99/, '')
|
19
|
+
$CXXFLAGS << ' ' << '-m64 -std=c++11'
|
20
|
+
when /aix/
|
21
|
+
# Compiler flags necessary on AIX.
|
22
|
+
# rubocop:disable Style/GlobalVars
|
23
|
+
$CFLAGS << ' ' << '-D_ALL_SOURCE=1'
|
24
|
+
$CPPFLAGS << ' ' << '-D_ALL_SOURCE=1'
|
25
|
+
$CXXFLAGS << ' ' << '-D_ALL_SOURCE=1'
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
create_makefile 'unf_ext'
|
30
|
+
|
31
|
+
unless CONFIG['CXX']
|
32
|
+
case CONFIG['CC']
|
33
|
+
when %r{((?:.*[-/])?)gcc([-0-9.]*)$}
|
34
|
+
cxx = $1 + 'g++' + $2
|
35
|
+
when %r{((?:.*[-/])?)clang([-0-9.]*)$}
|
36
|
+
cxx = $1 + 'clang++' + $2
|
37
|
+
else
|
38
|
+
cxx = CONFIG['CC']
|
39
|
+
end
|
40
|
+
|
41
|
+
warn "CXX is automatically set to #{cxx}"
|
42
|
+
|
43
|
+
new_mf = <<-EOF << File.read('Makefile')
|
44
|
+
CXX=#{cxx}
|
45
|
+
EOF
|
46
|
+
|
47
|
+
File.open('Makefile', 'w') { |mf|
|
48
|
+
mf.print new_mf
|
49
|
+
}
|
50
|
+
end
|
@@ -0,0 +1,139 @@
|
|
1
|
+
#ifndef UNF_NORMALIZER_HH
|
2
|
+
#define UNF_NORMALIZER_HH
|
3
|
+
|
4
|
+
#include <vector>
|
5
|
+
#include <string>
|
6
|
+
#include <algorithm>
|
7
|
+
#include <cstring>
|
8
|
+
#include "trie/searcher.hh"
|
9
|
+
#include "trie/char_stream.hh"
|
10
|
+
#include "table.hh"
|
11
|
+
#include "util.hh"
|
12
|
+
|
13
|
+
namespace UNF {
|
14
|
+
class Normalizer {
|
15
|
+
public:
|
16
|
+
enum Form { FORM_NFD, FORM_NFC, FORM_NFKD, FORM_NFKC };
|
17
|
+
|
18
|
+
public:
|
19
|
+
Normalizer()
|
20
|
+
: nf_d(TABLE::NODES, TABLE::CANONICAL_DECOM_ROOT, (const char *)TABLE::STRINGS),
|
21
|
+
nf_kd(TABLE::NODES, TABLE::COMPATIBILITY_DECOM_ROOT, (const char *)TABLE::STRINGS),
|
22
|
+
nf_c(TABLE::NODES, TABLE::CANONICAL_COM_ROOT, (const char *)TABLE::STRINGS),
|
23
|
+
nf_c_qc(TABLE::NODES, TABLE::NFC_ILLEGAL_ROOT),
|
24
|
+
nf_kc_qc(TABLE::NODES, TABLE::NFKC_ILLEGAL_ROOT),
|
25
|
+
ccc(TABLE::NODES, TABLE::CANONICAL_CLASS_ROOT)
|
26
|
+
{}
|
27
|
+
|
28
|
+
const char* normalize(const char* src, Form form) {
|
29
|
+
switch(form) {
|
30
|
+
case FORM_NFD: return nfd(src);
|
31
|
+
case FORM_NFC: return nfc(src);
|
32
|
+
case FORM_NFKD: return nfkd(src);
|
33
|
+
case FORM_NFKC: return nfkc(src);
|
34
|
+
default: return src;
|
35
|
+
}
|
36
|
+
}
|
37
|
+
const char* nfd(const char* src) { return decompose(src, nf_d); }
|
38
|
+
const char* nfkd(const char* src) { return decompose(src, nf_kd); }
|
39
|
+
const char* nfc(const char* src) { return compose(src, nf_c_qc, nf_d); }
|
40
|
+
const char* nfkc(const char* src) { return compose(src, nf_kc_qc, nf_kd); }
|
41
|
+
|
42
|
+
private:
|
43
|
+
const char* decompose(const char* src, const Trie::NormalizationForm& nf) {
|
44
|
+
const char* beg = next_invalid_char(src, nf);
|
45
|
+
if(*beg=='\0')
|
46
|
+
return src;
|
47
|
+
|
48
|
+
buffer.assign(src, beg);
|
49
|
+
do {
|
50
|
+
const char* end = next_valid_starter(beg, nf);
|
51
|
+
decompose_one(beg, end, nf, buffer);
|
52
|
+
beg = next_invalid_char(end, nf);
|
53
|
+
buffer.append(end, beg);
|
54
|
+
} while(*beg!='\0');
|
55
|
+
|
56
|
+
return buffer.c_str();
|
57
|
+
}
|
58
|
+
|
59
|
+
void decompose_one(const char* beg, const char* end, const Trie::NormalizationForm& nf, std::string& buf) {
|
60
|
+
unsigned last = buf.size();
|
61
|
+
nf.decompose(Trie::RangeCharStream(beg,end), buf);
|
62
|
+
char* bufbeg = const_cast<char*>(buf.data());
|
63
|
+
canonical_combining_class_ordering(bufbeg+last, bufbeg+buf.size());
|
64
|
+
}
|
65
|
+
|
66
|
+
const char* compose(const char* src, const Trie::NormalizationForm& nf, const Trie::NormalizationForm& nf_decomp) {
|
67
|
+
const char* beg = next_invalid_char(src, nf);
|
68
|
+
if(*beg=='\0')
|
69
|
+
return src;
|
70
|
+
|
71
|
+
buffer.assign(src, beg);
|
72
|
+
while(*beg!='\0') {
|
73
|
+
const char* end = next_valid_starter(beg, nf);
|
74
|
+
buffer2.clear();
|
75
|
+
decompose_one(beg, end, nf_decomp, buffer2);
|
76
|
+
end = compose_one(buffer2.c_str(), end, buffer);
|
77
|
+
beg = next_invalid_char(end, nf);
|
78
|
+
buffer.append(end, beg);
|
79
|
+
}
|
80
|
+
|
81
|
+
return buffer.c_str();
|
82
|
+
}
|
83
|
+
|
84
|
+
const char* compose_one(const char* starter, const char* rest_starter, std::string& buf) {
|
85
|
+
Trie::CharStreamForComposition in(starter, rest_starter, canonical_classes, buffer3);
|
86
|
+
while(in.within_first())
|
87
|
+
nf_c.compose(in, buf);
|
88
|
+
return in.cur();
|
89
|
+
}
|
90
|
+
|
91
|
+
void canonical_combining_class_ordering(char* beg, const char* end) {
|
92
|
+
canonical_classes.assign(end-beg+1, 0); // +1 is for sentinel value
|
93
|
+
ccc.sort(beg, canonical_classes);
|
94
|
+
}
|
95
|
+
|
96
|
+
const char* next_invalid_char(const char* src, const Trie::NormalizationForm& nf) const {
|
97
|
+
int last_canonical_class = 0;
|
98
|
+
const char* cur = Util::nearest_utf8_char_start_point(src);
|
99
|
+
const char* starter = cur;
|
100
|
+
|
101
|
+
for(; *cur != '\0'; cur = Util::nearest_utf8_char_start_point(cur+1)) {
|
102
|
+
int canonical_class = ccc.get_class(cur);
|
103
|
+
if(last_canonical_class > canonical_class && canonical_class != 0)
|
104
|
+
return starter;
|
105
|
+
|
106
|
+
if(nf.quick_check(cur)==false)
|
107
|
+
return starter;
|
108
|
+
|
109
|
+
if(canonical_class==0)
|
110
|
+
starter=cur;
|
111
|
+
|
112
|
+
last_canonical_class = canonical_class;
|
113
|
+
}
|
114
|
+
return cur;
|
115
|
+
}
|
116
|
+
|
117
|
+
const char* next_valid_starter(const char* src, const Trie::NormalizationForm& nf) const {
|
118
|
+
const char* cur = Util::nearest_utf8_char_start_point(src+1);
|
119
|
+
while(ccc.get_class(cur)!=0 || nf.quick_check(cur)==false)
|
120
|
+
cur = Util::nearest_utf8_char_start_point(cur+1);
|
121
|
+
return cur;
|
122
|
+
}
|
123
|
+
|
124
|
+
private:
|
125
|
+
const Trie::NormalizationForm nf_d;
|
126
|
+
const Trie::NormalizationForm nf_kd;
|
127
|
+
const Trie::NormalizationForm nf_c;
|
128
|
+
const Trie::NormalizationForm nf_c_qc;
|
129
|
+
const Trie::NormalizationForm nf_kc_qc;
|
130
|
+
const Trie::CanonicalCombiningClass ccc;
|
131
|
+
|
132
|
+
std::string buffer;
|
133
|
+
std::string buffer2;
|
134
|
+
std::string buffer3;
|
135
|
+
std::vector<unsigned char> canonical_classes;
|
136
|
+
};
|
137
|
+
}
|
138
|
+
|
139
|
+
#endif
|