cld3 3.5.0 → 3.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +0 -8
- data/cld3.gemspec +6 -6
- data/ext/cld3/extconf.rb +1 -2
- data/ext/cld3/nnet_language_identifier_c.cc +163 -70
- data/lib/cld3.rb +14 -102
- data/sig/cld3.rbs +2 -0
- metadata +15 -77
- data/ext/cld3/Makefile +0 -268
- data/ext/cld3/base.o +0 -0
- data/ext/cld3/embedding_feature_extractor.o +0 -0
- data/ext/cld3/embedding_network.o +0 -0
- data/ext/cld3/feature_extractor.o +0 -0
- data/ext/cld3/feature_types.o +0 -0
- data/ext/cld3/fixunicodevalue.o +0 -0
- data/ext/cld3/fml_parser.o +0 -0
- data/ext/cld3/generated_entities.o +0 -0
- data/ext/cld3/generated_ulscript.o +0 -0
- data/ext/cld3/getonescriptspan.o +0 -0
- data/ext/cld3/lang_id_nn_params.o +0 -0
- data/ext/cld3/language_identifier_features.o +0 -0
- data/ext/cld3/libcld3.def +0 -8
- data/ext/cld3/libcld3.so +0 -0
- data/ext/cld3/nnet_language_identifier.o +0 -0
- data/ext/cld3/nnet_language_identifier_c.o +0 -0
- data/ext/cld3/offsetmap.o +0 -0
- data/ext/cld3/registry.o +0 -0
- data/ext/cld3/relevant_script_feature.o +0 -0
- data/ext/cld3/script_span/fixunicodevalue.h +0 -69
- data/ext/cld3/script_span/generated_ulscript.h +0 -142
- data/ext/cld3/script_span/getonescriptspan.h +0 -124
- data/ext/cld3/script_span/integral_types.h +0 -37
- data/ext/cld3/script_span/offsetmap.h +0 -168
- data/ext/cld3/script_span/port.h +0 -143
- data/ext/cld3/script_span/stringpiece.h +0 -81
- data/ext/cld3/script_span/text_processing.h +0 -30
- data/ext/cld3/script_span/utf8acceptinterchange.h +0 -486
- data/ext/cld3/script_span/utf8prop_lettermarkscriptnum.h +0 -1631
- data/ext/cld3/script_span/utf8repl_lettermarklower.h +0 -758
- data/ext/cld3/script_span/utf8scannot_lettermarkspecial.h +0 -1455
- data/ext/cld3/script_span/utf8statetable.h +0 -285
- data/ext/cld3/sentence_features.o +0 -0
- data/ext/cld3/task_context.o +0 -0
- data/ext/cld3/task_context_params.o +0 -0
- data/ext/cld3/text_processing.o +0 -0
- data/ext/cld3/unicodetext.o +0 -0
- data/ext/cld3/utf8statetable.o +0 -0
- data/ext/cld3/utils.o +0 -0
- data/ext/cld3/workspace.o +0 -0
- data/lib/cld3/unstable.rb +0 -58
metadata
CHANGED
@@ -1,95 +1,75 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cld3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.5.
|
4
|
+
version: 3.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Akihiko Odaki
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-02-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: ffi
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: 1.1.0
|
20
|
-
- - "<"
|
21
|
-
- !ruby/object:Gem::Version
|
22
|
-
version: 1.16.0
|
23
|
-
type: :runtime
|
24
|
-
prerelease: false
|
25
|
-
version_requirements: !ruby/object:Gem::Requirement
|
26
|
-
requirements:
|
27
|
-
- - ">="
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
version: 1.1.0
|
30
|
-
- - "<"
|
31
|
-
- !ruby/object:Gem::Version
|
32
|
-
version: 1.16.0
|
33
13
|
- !ruby/object:Gem::Dependency
|
34
14
|
name: rbs
|
35
15
|
requirement: !ruby/object:Gem::Requirement
|
36
16
|
requirements:
|
37
17
|
- - ">="
|
38
18
|
- !ruby/object:Gem::Version
|
39
|
-
version: 2.
|
19
|
+
version: 2.8.0
|
40
20
|
- - "<"
|
41
21
|
- !ruby/object:Gem::Version
|
42
|
-
version: 2.
|
22
|
+
version: 2.9.0
|
43
23
|
type: :development
|
44
24
|
prerelease: false
|
45
25
|
version_requirements: !ruby/object:Gem::Requirement
|
46
26
|
requirements:
|
47
27
|
- - ">="
|
48
28
|
- !ruby/object:Gem::Version
|
49
|
-
version: 2.
|
29
|
+
version: 2.8.0
|
50
30
|
- - "<"
|
51
31
|
- !ruby/object:Gem::Version
|
52
|
-
version: 2.
|
32
|
+
version: 2.9.0
|
53
33
|
- !ruby/object:Gem::Dependency
|
54
34
|
name: rspec
|
55
35
|
requirement: !ruby/object:Gem::Requirement
|
56
36
|
requirements:
|
57
37
|
- - ">="
|
58
38
|
- !ruby/object:Gem::Version
|
59
|
-
version: 3.
|
39
|
+
version: 3.12.0
|
60
40
|
- - "<"
|
61
41
|
- !ruby/object:Gem::Version
|
62
|
-
version: 3.
|
42
|
+
version: 3.13.0
|
63
43
|
type: :development
|
64
44
|
prerelease: false
|
65
45
|
version_requirements: !ruby/object:Gem::Requirement
|
66
46
|
requirements:
|
67
47
|
- - ">="
|
68
48
|
- !ruby/object:Gem::Version
|
69
|
-
version: 3.
|
49
|
+
version: 3.12.0
|
70
50
|
- - "<"
|
71
51
|
- !ruby/object:Gem::Version
|
72
|
-
version: 3.
|
52
|
+
version: 3.13.0
|
73
53
|
- !ruby/object:Gem::Dependency
|
74
54
|
name: steep
|
75
55
|
requirement: !ruby/object:Gem::Requirement
|
76
56
|
requirements:
|
77
57
|
- - ">="
|
78
58
|
- !ruby/object:Gem::Version
|
79
|
-
version: 1.
|
59
|
+
version: 1.3.0
|
80
60
|
- - "<"
|
81
61
|
- !ruby/object:Gem::Version
|
82
|
-
version: 1.
|
62
|
+
version: 1.4.0
|
83
63
|
type: :development
|
84
64
|
prerelease: false
|
85
65
|
version_requirements: !ruby/object:Gem::Requirement
|
86
66
|
requirements:
|
87
67
|
- - ">="
|
88
68
|
- !ruby/object:Gem::Version
|
89
|
-
version: 1.
|
69
|
+
version: 1.3.0
|
90
70
|
- - "<"
|
91
71
|
- !ruby/object:Gem::Version
|
92
|
-
version: 1.
|
72
|
+
version: 1.4.0
|
93
73
|
description: Compact Language Detector v3 (CLD3) is a neural network model for language
|
94
74
|
identification.
|
95
75
|
email: akihiko.odaki@gmail.com
|
@@ -103,113 +83,71 @@ files:
|
|
103
83
|
- LICENSE_CLD3
|
104
84
|
- README.md
|
105
85
|
- cld3.gemspec
|
106
|
-
- ext/cld3/Makefile
|
107
86
|
- ext/cld3/base.cc
|
108
87
|
- ext/cld3/base.h
|
109
|
-
- ext/cld3/base.o
|
110
88
|
- ext/cld3/casts.h
|
111
89
|
- ext/cld3/cld_3/protos/feature_extractor.pb.h
|
112
90
|
- ext/cld3/cld_3/protos/sentence.pb.h
|
113
91
|
- ext/cld3/cld_3/protos/task_spec.pb.h
|
114
92
|
- ext/cld3/embedding_feature_extractor.cc
|
115
93
|
- ext/cld3/embedding_feature_extractor.h
|
116
|
-
- ext/cld3/embedding_feature_extractor.o
|
117
94
|
- ext/cld3/embedding_network.cc
|
118
95
|
- ext/cld3/embedding_network.h
|
119
|
-
- ext/cld3/embedding_network.o
|
120
96
|
- ext/cld3/embedding_network_params.h
|
121
97
|
- ext/cld3/extconf.rb
|
122
98
|
- ext/cld3/feature_extractor.cc
|
123
99
|
- ext/cld3/feature_extractor.h
|
124
|
-
- ext/cld3/feature_extractor.o
|
125
100
|
- ext/cld3/feature_types.cc
|
126
101
|
- ext/cld3/feature_types.h
|
127
|
-
- ext/cld3/feature_types.o
|
128
102
|
- ext/cld3/fixunicodevalue.cc
|
129
103
|
- ext/cld3/fixunicodevalue.h
|
130
|
-
- ext/cld3/fixunicodevalue.o
|
131
104
|
- ext/cld3/float16.h
|
132
105
|
- ext/cld3/fml_parser.cc
|
133
106
|
- ext/cld3/fml_parser.h
|
134
|
-
- ext/cld3/fml_parser.o
|
135
107
|
- ext/cld3/generated_entities.cc
|
136
|
-
- ext/cld3/generated_entities.o
|
137
108
|
- ext/cld3/generated_ulscript.cc
|
138
109
|
- ext/cld3/generated_ulscript.h
|
139
|
-
- ext/cld3/generated_ulscript.o
|
140
110
|
- ext/cld3/getonescriptspan.cc
|
141
111
|
- ext/cld3/getonescriptspan.h
|
142
|
-
- ext/cld3/getonescriptspan.o
|
143
112
|
- ext/cld3/integral_types.h
|
144
113
|
- ext/cld3/lang_id_nn_params.cc
|
145
114
|
- ext/cld3/lang_id_nn_params.h
|
146
|
-
- ext/cld3/lang_id_nn_params.o
|
147
115
|
- ext/cld3/language_identifier_features.cc
|
148
116
|
- ext/cld3/language_identifier_features.h
|
149
|
-
- ext/cld3/language_identifier_features.o
|
150
|
-
- ext/cld3/libcld3.def
|
151
|
-
- ext/cld3/libcld3.so
|
152
117
|
- ext/cld3/nnet_language_identifier.cc
|
153
118
|
- ext/cld3/nnet_language_identifier.h
|
154
|
-
- ext/cld3/nnet_language_identifier.o
|
155
119
|
- ext/cld3/nnet_language_identifier_c.cc
|
156
|
-
- ext/cld3/nnet_language_identifier_c.o
|
157
120
|
- ext/cld3/offsetmap.cc
|
158
121
|
- ext/cld3/offsetmap.h
|
159
|
-
- ext/cld3/offsetmap.o
|
160
122
|
- ext/cld3/port.h
|
161
123
|
- ext/cld3/registry.cc
|
162
124
|
- ext/cld3/registry.h
|
163
|
-
- ext/cld3/registry.o
|
164
125
|
- ext/cld3/relevant_script_feature.cc
|
165
126
|
- ext/cld3/relevant_script_feature.h
|
166
|
-
- ext/cld3/relevant_script_feature.o
|
167
127
|
- ext/cld3/script_detector.h
|
168
|
-
- ext/cld3/script_span/fixunicodevalue.h
|
169
|
-
- ext/cld3/script_span/generated_ulscript.h
|
170
|
-
- ext/cld3/script_span/getonescriptspan.h
|
171
|
-
- ext/cld3/script_span/integral_types.h
|
172
|
-
- ext/cld3/script_span/offsetmap.h
|
173
|
-
- ext/cld3/script_span/port.h
|
174
|
-
- ext/cld3/script_span/stringpiece.h
|
175
|
-
- ext/cld3/script_span/text_processing.h
|
176
|
-
- ext/cld3/script_span/utf8acceptinterchange.h
|
177
|
-
- ext/cld3/script_span/utf8prop_lettermarkscriptnum.h
|
178
|
-
- ext/cld3/script_span/utf8repl_lettermarklower.h
|
179
|
-
- ext/cld3/script_span/utf8scannot_lettermarkspecial.h
|
180
|
-
- ext/cld3/script_span/utf8statetable.h
|
181
128
|
- ext/cld3/sentence_features.cc
|
182
129
|
- ext/cld3/sentence_features.h
|
183
|
-
- ext/cld3/sentence_features.o
|
184
130
|
- ext/cld3/simple_adder.h
|
185
131
|
- ext/cld3/stringpiece.h
|
186
132
|
- ext/cld3/task_context.cc
|
187
133
|
- ext/cld3/task_context.h
|
188
|
-
- ext/cld3/task_context.o
|
189
134
|
- ext/cld3/task_context_params.cc
|
190
135
|
- ext/cld3/task_context_params.h
|
191
|
-
- ext/cld3/task_context_params.o
|
192
136
|
- ext/cld3/text_processing.cc
|
193
137
|
- ext/cld3/text_processing.h
|
194
|
-
- ext/cld3/text_processing.o
|
195
138
|
- ext/cld3/unicodetext.cc
|
196
139
|
- ext/cld3/unicodetext.h
|
197
|
-
- ext/cld3/unicodetext.o
|
198
140
|
- ext/cld3/utf8acceptinterchange.h
|
199
141
|
- ext/cld3/utf8prop_lettermarkscriptnum.h
|
200
142
|
- ext/cld3/utf8repl_lettermarklower.h
|
201
143
|
- ext/cld3/utf8scannot_lettermarkspecial.h
|
202
144
|
- ext/cld3/utf8statetable.cc
|
203
145
|
- ext/cld3/utf8statetable.h
|
204
|
-
- ext/cld3/utf8statetable.o
|
205
146
|
- ext/cld3/utils.cc
|
206
147
|
- ext/cld3/utils.h
|
207
|
-
- ext/cld3/utils.o
|
208
148
|
- ext/cld3/workspace.cc
|
209
149
|
- ext/cld3/workspace.h
|
210
|
-
- ext/cld3/workspace.o
|
211
150
|
- lib/cld3.rb
|
212
|
-
- lib/cld3/unstable.rb
|
213
151
|
- sig/cld3.rbs
|
214
152
|
homepage: https://github.com/akihikodaki/cld3-ruby
|
215
153
|
licenses:
|
@@ -233,7 +171,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
233
171
|
- !ruby/object:Gem::Version
|
234
172
|
version: '0'
|
235
173
|
requirements: []
|
236
|
-
rubygems_version: 3.3.
|
174
|
+
rubygems_version: 3.3.25
|
237
175
|
signing_key:
|
238
176
|
specification_version: 4
|
239
177
|
summary: Compact Language Detector v3 (CLD3)
|
data/ext/cld3/Makefile
DELETED
@@ -1,268 +0,0 @@
|
|
1
|
-
|
2
|
-
SHELL = /bin/sh
|
3
|
-
|
4
|
-
# V=0 quiet, V=1 verbose. other values don't work.
|
5
|
-
V = 1
|
6
|
-
V0 = $(V:0=)
|
7
|
-
Q1 = $(V:1=)
|
8
|
-
Q = $(Q1:0=@)
|
9
|
-
ECHO1 = $(V:1=@ :)
|
10
|
-
ECHO = $(ECHO1:0=@ echo)
|
11
|
-
NULLCMD = :
|
12
|
-
|
13
|
-
#### Start of system configuration section. ####
|
14
|
-
|
15
|
-
srcdir = .
|
16
|
-
topdir = /usr/include
|
17
|
-
hdrdir = $(topdir)
|
18
|
-
arch_hdrdir = /usr/include
|
19
|
-
PATH_SEPARATOR = :
|
20
|
-
VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
|
21
|
-
prefix = $(DESTDIR)/usr
|
22
|
-
rubysitearchprefix = $(sitearchlibdir)/$(RUBY_BASE_NAME)
|
23
|
-
rubyarchprefix = $(DESTDIR)/usr/lib64/ruby
|
24
|
-
rubylibprefix = $(exec_prefix)/share/ruby
|
25
|
-
exec_prefix = $(DESTDIR)/usr
|
26
|
-
vendorarchhdrdir = $(vendorhdrdir)/$(arch)
|
27
|
-
sitearchhdrdir = $(sitehdrdir)/$(arch)
|
28
|
-
rubyarchhdrdir = $(DESTDIR)/usr/include
|
29
|
-
vendorhdrdir = $(rubyhdrdir)/vendor_ruby
|
30
|
-
sitehdrdir = $(rubyhdrdir)/site_ruby
|
31
|
-
rubyhdrdir = $(DESTDIR)/usr/include
|
32
|
-
rubygemsdir = $(DESTDIR)/usr/share/rubygems
|
33
|
-
vendorarchdir = $(DESTDIR)/usr/lib64/ruby/vendor_ruby
|
34
|
-
vendorlibdir = $(vendordir)
|
35
|
-
vendordir = $(DESTDIR)/usr/share/ruby/vendor_ruby
|
36
|
-
sitearchdir = $(DESTDIR)/usr/local/lib64/ruby/site_ruby
|
37
|
-
sitelibdir = $(sitedir)
|
38
|
-
sitedir = $(DESTDIR)/usr/local/share/ruby/site_ruby
|
39
|
-
rubyarchdir = $(rubyarchprefix)
|
40
|
-
rubylibdir = $(rubylibprefix)
|
41
|
-
sitearchincludedir = $(includedir)/$(sitearch)
|
42
|
-
archincludedir = $(includedir)/$(arch)
|
43
|
-
sitearchlibdir = $(libdir)/$(sitearch)
|
44
|
-
archlibdir = $(DESTDIR)/usr/lib64
|
45
|
-
ridir = $(datarootdir)/$(RI_BASE_NAME)
|
46
|
-
mandir = $(DESTDIR)/usr/share/man
|
47
|
-
localedir = $(datarootdir)/locale
|
48
|
-
libdir = $(exec_prefix)/lib64
|
49
|
-
psdir = $(docdir)
|
50
|
-
pdfdir = $(docdir)
|
51
|
-
dvidir = $(docdir)
|
52
|
-
htmldir = $(docdir)
|
53
|
-
infodir = $(DESTDIR)/usr/share/info
|
54
|
-
docdir = $(datarootdir)/doc/$(PACKAGE)
|
55
|
-
oldincludedir = $(DESTDIR)/usr/include
|
56
|
-
includedir = $(exec_prefix)/include
|
57
|
-
runstatedir = $(localstatedir)/run
|
58
|
-
localstatedir = $(DESTDIR)/var
|
59
|
-
sharedstatedir = $(DESTDIR)/var/lib
|
60
|
-
sysconfdir = $(DESTDIR)/etc
|
61
|
-
datadir = $(DESTDIR)/usr/share
|
62
|
-
datarootdir = $(prefix)/share
|
63
|
-
libexecdir = $(DESTDIR)/usr/libexec
|
64
|
-
sbindir = $(DESTDIR)/usr/sbin
|
65
|
-
bindir = $(exec_prefix)/bin
|
66
|
-
archdir = $(rubyarchdir)
|
67
|
-
|
68
|
-
|
69
|
-
CC_WRAPPER =
|
70
|
-
CC = gcc
|
71
|
-
CXX = g++
|
72
|
-
LIBRUBY = $(LIBRUBY_SO)
|
73
|
-
LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
|
74
|
-
LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
|
75
|
-
LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static $(MAINLIBS)
|
76
|
-
empty =
|
77
|
-
OUTFLAG = -o $(empty)
|
78
|
-
COUTFLAG = -o $(empty)
|
79
|
-
CSRCFLAG = $(empty)
|
80
|
-
|
81
|
-
RUBY_EXTCONF_H =
|
82
|
-
cflags = $(optflags) $(debugflags) $(warnflags)
|
83
|
-
cxxflags =
|
84
|
-
optflags = -O3 -fno-fast-math
|
85
|
-
debugflags = -ggdb3
|
86
|
-
warnflags = -Wall -Wextra -Wdeprecated-declarations -Wduplicated-cond -Wimplicit-function-declaration -Wimplicit-int -Wmisleading-indentation -Wpointer-arith -Wwrite-strings -Wold-style-definition -Wimplicit-fallthrough=0 -Wmissing-noreturn -Wno-cast-function-type -Wno-constant-logical-operand -Wno-long-long -Wno-missing-field-initializers -Wno-overlength-strings -Wno-packed-bitfield-compat -Wno-parentheses-equality -Wno-self-assign -Wno-tautological-compare -Wno-unused-parameter -Wno-unused-value -Wsuggest-attribute=format -Wsuggest-attribute=noreturn -Wunused-variable -Wundef
|
87
|
-
cppflags =
|
88
|
-
CCDLFLAGS = -fPIC
|
89
|
-
CFLAGS = $(CCDLFLAGS) -O2 -flto=auto -ffat-lto-objects -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 -fstack-protector-strong -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -mbranch-protection=standard -fasynchronous-unwind-tables -fstack-clash-protection -fPIC $(ARCH_FLAG)
|
90
|
-
INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir)
|
91
|
-
DEFS =
|
92
|
-
CPPFLAGS = $(DEFS) $(cppflags)
|
93
|
-
CXXFLAGS = $(CCDLFLAGS) -O2 -flto=auto -ffat-lto-objects -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 -fstack-protector-strong -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -mbranch-protection=standard -fasynchronous-unwind-tables -fstack-clash-protection -fvisibility=hidden -std=c++17 $(ARCH_FLAG)
|
94
|
-
ldflags = -L. -Wl,-z,relro -Wl,--as-needed -Wl,-z,now -specs=/usr/lib/rpm/redhat/redhat-hardened-ld -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -Wl,--build-id=sha1 -fstack-protector-strong -rdynamic -Wl,-export-dynamic -Wl,--no-as-needed
|
95
|
-
dldflags = -Wl,-z,relro -Wl,--as-needed -Wl,-z,now -specs=/usr/lib/rpm/redhat/redhat-hardened-ld -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -Wl,--build-id=sha1
|
96
|
-
ARCH_FLAG =
|
97
|
-
DLDFLAGS = $(ldflags) $(dldflags) $(ARCH_FLAG)
|
98
|
-
LDSHARED = $(CC) -shared
|
99
|
-
LDSHAREDXX = $(CXX) -shared
|
100
|
-
AR = gcc-ar
|
101
|
-
EXEEXT =
|
102
|
-
|
103
|
-
RUBY_INSTALL_NAME = $(RUBY_BASE_NAME)
|
104
|
-
RUBY_SO_NAME = ruby
|
105
|
-
RUBYW_INSTALL_NAME =
|
106
|
-
RUBY_VERSION_NAME = $(RUBY_BASE_NAME)-$(ruby_version_dir_name)
|
107
|
-
RUBYW_BASE_NAME = rubyw
|
108
|
-
RUBY_BASE_NAME = ruby
|
109
|
-
|
110
|
-
arch = aarch64-linux
|
111
|
-
sitearch = $(arch)
|
112
|
-
ruby_version = 3.1.0
|
113
|
-
ruby = $(bindir)/$(RUBY_BASE_NAME)
|
114
|
-
RUBY = $(ruby)
|
115
|
-
ruby_headers = $(hdrdir)/ruby.h $(hdrdir)/ruby/backward.h $(hdrdir)/ruby/ruby.h $(hdrdir)/ruby/defines.h $(hdrdir)/ruby/missing.h $(hdrdir)/ruby/intern.h $(hdrdir)/ruby/st.h $(hdrdir)/ruby/subst.h $(arch_hdrdir)/ruby/config.h
|
116
|
-
|
117
|
-
RM = rm -f
|
118
|
-
RM_RF = rm -fr
|
119
|
-
RMDIRS = rmdir --ignore-fail-on-non-empty -p
|
120
|
-
MAKEDIRS = /usr/bin/mkdir -p
|
121
|
-
INSTALL = /usr/bin/install -c
|
122
|
-
INSTALL_PROG = $(INSTALL) -m 0755
|
123
|
-
INSTALL_DATA = $(INSTALL) -m 644
|
124
|
-
COPY = cp
|
125
|
-
TOUCH = exit >
|
126
|
-
|
127
|
-
#### End of system configuration section. ####
|
128
|
-
|
129
|
-
preload =
|
130
|
-
libpath = . $(archlibdir)
|
131
|
-
LIBPATH = -L. -L$(archlibdir)
|
132
|
-
DEFFILE =
|
133
|
-
|
134
|
-
CLEANFILES = mkmf.log
|
135
|
-
DISTCLEANFILES =
|
136
|
-
DISTCLEANDIRS =
|
137
|
-
|
138
|
-
extout =
|
139
|
-
extout_prefix =
|
140
|
-
target_prefix =
|
141
|
-
LOCAL_LIBS =
|
142
|
-
LIBS = -lm -lc
|
143
|
-
ORIG_SRCS = base.cc embedding_feature_extractor.cc embedding_network.cc feature_extractor.cc feature_types.cc fixunicodevalue.cc fml_parser.cc generated_entities.cc generated_ulscript.cc getonescriptspan.cc lang_id_nn_params.cc language_identifier_features.cc nnet_language_identifier.cc nnet_language_identifier_c.cc offsetmap.cc registry.cc relevant_script_feature.cc sentence_features.cc task_context.cc task_context_params.cc text_processing.cc unicodetext.cc utf8statetable.cc utils.cc workspace.cc
|
144
|
-
SRCS = $(ORIG_SRCS)
|
145
|
-
OBJS = base.o embedding_feature_extractor.o embedding_network.o feature_extractor.o feature_types.o fixunicodevalue.o fml_parser.o generated_entities.o generated_ulscript.o getonescriptspan.o lang_id_nn_params.o language_identifier_features.o nnet_language_identifier.o nnet_language_identifier_c.o offsetmap.o registry.o relevant_script_feature.o sentence_features.o task_context.o task_context_params.o text_processing.o unicodetext.o utf8statetable.o utils.o workspace.o
|
146
|
-
HDRS = $(srcdir)/base.h $(srcdir)/casts.h $(srcdir)/embedding_feature_extractor.h $(srcdir)/embedding_network.h $(srcdir)/embedding_network_params.h $(srcdir)/feature_extractor.h $(srcdir)/feature_types.h $(srcdir)/fixunicodevalue.h $(srcdir)/float16.h $(srcdir)/fml_parser.h $(srcdir)/generated_ulscript.h $(srcdir)/getonescriptspan.h $(srcdir)/integral_types.h $(srcdir)/lang_id_nn_params.h $(srcdir)/language_identifier_features.h $(srcdir)/nnet_language_identifier.h $(srcdir)/offsetmap.h $(srcdir)/port.h $(srcdir)/registry.h $(srcdir)/relevant_script_feature.h $(srcdir)/script_detector.h $(srcdir)/sentence_features.h $(srcdir)/simple_adder.h $(srcdir)/stringpiece.h $(srcdir)/task_context.h $(srcdir)/task_context_params.h $(srcdir)/text_processing.h $(srcdir)/unicodetext.h $(srcdir)/utf8acceptinterchange.h $(srcdir)/utf8prop_lettermarkscriptnum.h $(srcdir)/utf8repl_lettermarklower.h $(srcdir)/utf8scannot_lettermarkspecial.h $(srcdir)/utf8statetable.h $(srcdir)/utils.h $(srcdir)/workspace.h
|
147
|
-
LOCAL_HDRS =
|
148
|
-
TARGET = libcld3
|
149
|
-
TARGET_NAME = libcld3
|
150
|
-
TARGET_ENTRY = Init_$(TARGET_NAME)
|
151
|
-
DLLIB = $(TARGET).so
|
152
|
-
EXTSTATIC =
|
153
|
-
STATIC_LIB =
|
154
|
-
|
155
|
-
TIMESTAMP_DIR = .
|
156
|
-
BINDIR = $(bindir)
|
157
|
-
RUBYCOMMONDIR = $(sitedir)$(target_prefix)
|
158
|
-
RUBYLIBDIR = $(sitelibdir)$(target_prefix)
|
159
|
-
RUBYARCHDIR = $(sitearchdir)$(target_prefix)
|
160
|
-
HDRDIR = $(sitehdrdir)$(target_prefix)
|
161
|
-
ARCHHDRDIR = $(sitearchhdrdir)$(target_prefix)
|
162
|
-
TARGET_SO_DIR =
|
163
|
-
TARGET_SO = $(TARGET_SO_DIR)$(DLLIB)
|
164
|
-
CLEANLIBS = $(TARGET_SO) false
|
165
|
-
CLEANOBJS = *.o *.bak
|
166
|
-
|
167
|
-
all: $(DLLIB)
|
168
|
-
static: $(STATIC_LIB)
|
169
|
-
.PHONY: all install static install-so install-rb
|
170
|
-
.PHONY: clean clean-so clean-static clean-rb
|
171
|
-
|
172
|
-
clean-static::
|
173
|
-
clean-rb-default::
|
174
|
-
clean-rb::
|
175
|
-
clean-so::
|
176
|
-
clean: clean-so clean-static clean-rb-default clean-rb
|
177
|
-
-$(Q)$(RM_RF) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES) .*.time
|
178
|
-
|
179
|
-
distclean-rb-default::
|
180
|
-
distclean-rb::
|
181
|
-
distclean-so::
|
182
|
-
distclean-static::
|
183
|
-
distclean: clean distclean-so distclean-static distclean-rb-default distclean-rb
|
184
|
-
-$(Q)$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
|
185
|
-
-$(Q)$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
|
186
|
-
-$(Q)$(RMDIRS) $(DISTCLEANDIRS) 2> /dev/null || true
|
187
|
-
|
188
|
-
realclean: distclean
|
189
|
-
install: install-so install-rb
|
190
|
-
|
191
|
-
install-so: $(DLLIB) $(TIMESTAMP_DIR)/.sitearchdir.time
|
192
|
-
$(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
|
193
|
-
clean-static::
|
194
|
-
-$(Q)$(RM) $(STATIC_LIB)
|
195
|
-
install-rb: pre-install-rb do-install-rb install-rb-default
|
196
|
-
install-rb-default: pre-install-rb-default do-install-rb-default
|
197
|
-
pre-install-rb: Makefile
|
198
|
-
pre-install-rb-default: Makefile
|
199
|
-
do-install-rb:
|
200
|
-
do-install-rb-default:
|
201
|
-
pre-install-rb-default:
|
202
|
-
@$(NULLCMD)
|
203
|
-
$(TIMESTAMP_DIR)/.sitearchdir.time:
|
204
|
-
$(Q) $(MAKEDIRS) $(@D) $(RUBYARCHDIR)
|
205
|
-
$(Q) $(TOUCH) $@
|
206
|
-
|
207
|
-
site-install: site-install-so site-install-rb
|
208
|
-
site-install-so: install-so
|
209
|
-
site-install-rb: install-rb
|
210
|
-
|
211
|
-
.SUFFIXES: .c .m .cc .mm .cxx .cpp .o .S
|
212
|
-
|
213
|
-
.cc.o:
|
214
|
-
$(ECHO) compiling $(<)
|
215
|
-
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
|
216
|
-
|
217
|
-
.cc.S:
|
218
|
-
$(ECHO) translating $(<)
|
219
|
-
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
|
220
|
-
|
221
|
-
.mm.o:
|
222
|
-
$(ECHO) compiling $(<)
|
223
|
-
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
|
224
|
-
|
225
|
-
.mm.S:
|
226
|
-
$(ECHO) translating $(<)
|
227
|
-
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
|
228
|
-
|
229
|
-
.cxx.o:
|
230
|
-
$(ECHO) compiling $(<)
|
231
|
-
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
|
232
|
-
|
233
|
-
.cxx.S:
|
234
|
-
$(ECHO) translating $(<)
|
235
|
-
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
|
236
|
-
|
237
|
-
.cpp.o:
|
238
|
-
$(ECHO) compiling $(<)
|
239
|
-
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
|
240
|
-
|
241
|
-
.cpp.S:
|
242
|
-
$(ECHO) translating $(<)
|
243
|
-
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
|
244
|
-
|
245
|
-
.c.o:
|
246
|
-
$(ECHO) compiling $(<)
|
247
|
-
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
|
248
|
-
|
249
|
-
.c.S:
|
250
|
-
$(ECHO) translating $(<)
|
251
|
-
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
|
252
|
-
|
253
|
-
.m.o:
|
254
|
-
$(ECHO) compiling $(<)
|
255
|
-
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
|
256
|
-
|
257
|
-
.m.S:
|
258
|
-
$(ECHO) translating $(<)
|
259
|
-
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
|
260
|
-
|
261
|
-
$(TARGET_SO): $(OBJS) Makefile
|
262
|
-
$(ECHO) linking shared-object $(DLLIB)
|
263
|
-
-$(Q)$(RM) $(@)
|
264
|
-
$(Q) $(LDSHAREDXX) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
$(OBJS): $(HDRS) $(ruby_headers)
|
data/ext/cld3/base.o
DELETED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/ext/cld3/feature_types.o
DELETED
Binary file
|
data/ext/cld3/fixunicodevalue.o
DELETED
Binary file
|
data/ext/cld3/fml_parser.o
DELETED
Binary file
|
Binary file
|
Binary file
|
data/ext/cld3/getonescriptspan.o
DELETED
Binary file
|
Binary file
|
Binary file
|
data/ext/cld3/libcld3.def
DELETED
data/ext/cld3/libcld3.so
DELETED
Binary file
|
Binary file
|
Binary file
|
data/ext/cld3/offsetmap.o
DELETED
Binary file
|
data/ext/cld3/registry.o
DELETED
Binary file
|
Binary file
|
@@ -1,69 +0,0 @@
|
|
1
|
-
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
-
//
|
3
|
-
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
// you may not use this file except in compliance with the License.
|
5
|
-
// You may obtain a copy of the License at
|
6
|
-
//
|
7
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
//
|
9
|
-
// Unless required by applicable law or agreed to in writing, software
|
10
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
// See the License for the specific language governing permissions and
|
13
|
-
// limitations under the License.
|
14
|
-
|
15
|
-
//
|
16
|
-
// Routine that maps a Unicode code point to an interchange-valid one
|
17
|
-
//
|
18
|
-
// Table that maps MS CP1252 bytes 00-FF to their corresponding Unicode
|
19
|
-
// code points. C0 and C1 control codes that are not interchange-valid
|
20
|
-
// are mapped to spaces.
|
21
|
-
|
22
|
-
|
23
|
-
#ifndef SCRIPT_SPAN_FIXUNICODEVALUE_H_
|
24
|
-
#define SCRIPT_SPAN_FIXUNICODEVALUE_H_
|
25
|
-
|
26
|
-
#include "integral_types.h" // for char32
|
27
|
-
#include "port.h"
|
28
|
-
|
29
|
-
namespace chrome_lang_id {
|
30
|
-
namespace CLD2 {
|
31
|
-
|
32
|
-
// Map byte value 0000-00FF to char32
|
33
|
-
// Maps C0 control codes (other than CR LF HT FF) to space [29 instances including DEL=0x7F]
|
34
|
-
// Maps C1 control codes to CP1252 [27 instances] or space [5 instances]
|
35
|
-
static const char32 kMapFullMicrosoft1252OrSpace[256] = {
|
36
|
-
0x20,0x20,0x20,0x20, 0x20,0x20,0x20,0x20, 0x20,0x09,0x0a,0x20, 0x0c,0x0d,0x20,0x20, // 00
|
37
|
-
0x20,0x20,0x20,0x20, 0x20,0x20,0x20,0x20, 0x20,0x20,0x20,0x20, 0x20,0x20,0x20,0x20,
|
38
|
-
0x20,0x21,0x22,0x23, 0x24,0x25,0x26,0x27, 0x28,0x29,0x2a,0x2b, 0x2c,0x2d,0x2e,0x2f,
|
39
|
-
0x30,0x31,0x32,0x33, 0x34,0x35,0x36,0x37, 0x38,0x39,0x3a,0x3b, 0x3c,0x3d,0x3e,0x3f,
|
40
|
-
|
41
|
-
0x40,0x41,0x42,0x43, 0x44,0x45,0x46,0x47, 0x48,0x49,0x4a,0x4b, 0x4c,0x4d,0x4e,0x4f, // 40
|
42
|
-
0x50,0x51,0x52,0x53, 0x54,0x55,0x56,0x57, 0x58,0x59,0x5a,0x5b, 0x5c,0x5d,0x5e,0x5f,
|
43
|
-
0x60,0x61,0x62,0x63, 0x64,0x65,0x66,0x67, 0x68,0x69,0x6a,0x6b, 0x6c,0x6d,0x6e,0x6f,
|
44
|
-
0x70,0x71,0x72,0x73, 0x74,0x75,0x76,0x77, 0x78,0x79,0x7a,0x7b, 0x7c,0x7d,0x7e,0x20,
|
45
|
-
|
46
|
-
0x20ac,0x20,0x201a,0x0192, 0x201e,0x2026,0x2020,0x2021, // 80
|
47
|
-
0x02c6,0x2030,0x0160,0x2039, 0x0152,0x20,0x017d,0x20,
|
48
|
-
0x20,0x2018,0x2019,0x201c, 0x201d,0x2022,0x2013,0x2014,
|
49
|
-
0x02dc,0x2122,0x0161,0x203a, 0x0153,0x20,0x017e,0x0178,
|
50
|
-
0xa0,0xa1,0xa2,0xa3, 0xa4,0xa5,0xa6,0xa7, 0xa8,0xa9,0xaa,0xab, 0xac,0xad,0xae,0xaf, // A0
|
51
|
-
0xb0,0xb1,0xb2,0xb3, 0xb4,0xb5,0xb6,0xb7, 0xb8,0xb9,0xba,0xbb, 0xbc,0xbd,0xbe,0xbf,
|
52
|
-
|
53
|
-
0xc0,0xc1,0xc2,0xc3, 0xc4,0xc5,0xc6,0xc7, 0xc8,0xc9,0xca,0xcb, 0xcc,0xcd,0xce,0xcf, // C0
|
54
|
-
0xd0,0xd1,0xd2,0xd3, 0xd4,0xd5,0xd6,0xd7, 0xd8,0xd9,0xda,0xdb, 0xdc,0xdd,0xde,0xdf,
|
55
|
-
0xe0,0xe1,0xe2,0xe3, 0xe4,0xe5,0xe6,0xe7, 0xe8,0xe9,0xea,0xeb, 0xec,0xed,0xee,0xef,
|
56
|
-
0xf0,0xf1,0xf2,0xf3, 0xf4,0xf5,0xf6,0xf7, 0xf8,0xf9,0xfa,0xfb, 0xfc,0xfd,0xfe,0xff,
|
57
|
-
};
|
58
|
-
|
59
|
-
// Guarantees that the resulting output value is interchange valid
|
60
|
-
// 00-FF; map to spaces or MS CP1252
|
61
|
-
// D800-DFFF; surrogates
|
62
|
-
// FDD0-FDEF; non-characters
|
63
|
-
// xxFFFE-xxFFFF; non-characters
|
64
|
-
char32 FixUnicodeValue(char32 uv);
|
65
|
-
|
66
|
-
} // End namespace CLD2
|
67
|
-
} // End namespace chrome_lang_id
|
68
|
-
|
69
|
-
#endif // SCRIPT_SPAN_FIXUNICODEVALUE_H_
|