cld3 3.4.4 → 3.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +0 -1
- data/README.md +4 -15
- data/cld3.gemspec +7 -7
- data/ext/cld3/cld_3/protos/feature_extractor.pb.h +100 -0
- data/ext/cld3/cld_3/protos/sentence.pb.h +35 -0
- data/ext/cld3/cld_3/protos/task_spec.pb.h +106 -0
- data/ext/cld3/extconf.rb +2 -12
- data/ext/cld3/getonescriptspan.cc +0 -2
- data/ext/cld3/nnet_language_identifier_c.cc +162 -70
- data/lib/cld3.rb +15 -100
- data/sig/cld3.rbs +2 -0
- metadata +20 -74
- data/ext/cld3/Makefile +0 -267
- data/ext/cld3/base.o +0 -0
- data/ext/cld3/embedding_feature_extractor.o +0 -0
- data/ext/cld3/embedding_network.o +0 -0
- data/ext/cld3/feature_extractor.o +0 -0
- data/ext/cld3/feature_extractor.pb.o +0 -0
- data/ext/cld3/feature_extractor.proto +0 -50
- data/ext/cld3/feature_types.o +0 -0
- data/ext/cld3/fixunicodevalue.o +0 -0
- data/ext/cld3/fml_parser.o +0 -0
- data/ext/cld3/generated_entities.o +0 -0
- data/ext/cld3/generated_ulscript.o +0 -0
- data/ext/cld3/getonescriptspan.o +0 -0
- data/ext/cld3/lang_id_nn_params.o +0 -0
- data/ext/cld3/language_identifier_features.o +0 -0
- data/ext/cld3/libcld3.def +0 -8
- data/ext/cld3/libcld3.so +0 -0
- data/ext/cld3/mkmf.log +0 -37
- data/ext/cld3/nnet_language_identifier.o +0 -0
- data/ext/cld3/nnet_language_identifier_c.o +0 -0
- data/ext/cld3/offsetmap.o +0 -0
- data/ext/cld3/registry.o +0 -0
- data/ext/cld3/relevant_script_feature.o +0 -0
- data/ext/cld3/sentence.pb.o +0 -0
- data/ext/cld3/sentence.proto +0 -77
- data/ext/cld3/sentence_features.o +0 -0
- data/ext/cld3/task_context.o +0 -0
- data/ext/cld3/task_context_params.o +0 -0
- data/ext/cld3/task_spec.pb.o +0 -0
- data/ext/cld3/task_spec.proto +0 -98
- data/ext/cld3/text_processing.o +0 -0
- data/ext/cld3/unicodetext.o +0 -0
- data/ext/cld3/utf8statetable.o +0 -0
- data/ext/cld3/utils.o +0 -0
- data/ext/cld3/workspace.o +0 -0
- data/lib/a.rb +0 -24
- data/lib/cld3/unstable.rb +0 -58
data/lib/cld3.rb
CHANGED
@@ -17,39 +17,10 @@
|
|
17
17
|
# limitations under the License.
|
18
18
|
# ==============================================================================
|
19
19
|
|
20
|
-
require "ffi"
|
21
|
-
require "rbconfig"
|
22
|
-
require "cld3/unstable"
|
23
|
-
|
24
20
|
# Module providing an interface for Compact Language Detector v3 (CLD3)
|
25
21
|
module CLD3
|
26
22
|
# Class for detecting the language of a document.
|
27
23
|
class NNetLanguageIdentifier
|
28
|
-
# Min number of bytes needed to make a prediction if the construcotr is
|
29
|
-
# called without the corresponding parameter.
|
30
|
-
# This is Numeric object.
|
31
|
-
MIN_NUM_BYTES_TO_CONSIDER = 140
|
32
|
-
|
33
|
-
# Max number of bytes needed to make a prediction if the construcotr is
|
34
|
-
# called without the corresponding parameter.
|
35
|
-
# This is Numeric object.
|
36
|
-
MAX_NUM_BYTES_TO_CONSIDER = 700
|
37
|
-
|
38
|
-
# Max number of input bytes to process.
|
39
|
-
# This is Numeric object.
|
40
|
-
MAX_NUM_INPUT_BYTES_TO_CONSIDER = 10000
|
41
|
-
|
42
|
-
# Predictions with probability greater than or equal to this threshold are
|
43
|
-
# marked as reliable. This threshold was optimized on a set of text segments
|
44
|
-
# extracted from wikipedia, and results in an overall precision, recall,
|
45
|
-
# and f1 equal to 0.9760, 0.9624, and 0.9692, respectively.
|
46
|
-
# This is Numeric object.
|
47
|
-
RELIABILITY_THRESHOLD = 0.7
|
48
|
-
|
49
|
-
# Reliability threshold for the languages hr and bs.
|
50
|
-
# This is Numeric object.
|
51
|
-
RELIABILITY_HR_BS_THRESHOLD = 0.5
|
52
|
-
|
53
24
|
# Holds probability that Span, specified by start/end indices, is a given
|
54
25
|
# language. The langauge is not stored here; it can be found in Result, which
|
55
26
|
# holds an Array of SpanInfo.
|
@@ -76,8 +47,10 @@ module CLD3
|
|
76
47
|
|
77
48
|
# The arguments are two Numeric objects.
|
78
49
|
def initialize(min_num_bytes = MIN_NUM_BYTES_TO_CONSIDER, max_num_bytes = MAX_NUM_BYTES_TO_CONSIDER)
|
79
|
-
|
80
|
-
|
50
|
+
min_num_bytes = min_num_bytes.ceil
|
51
|
+
max_num_bytes = max_num_bytes.floor
|
52
|
+
raise ArgumentError if min_num_bytes < 0 || min_num_bytes >= max_num_bytes
|
53
|
+
@cc = Unstable.make(min_num_bytes, max_num_bytes)
|
81
54
|
end
|
82
55
|
|
83
56
|
# Finds the most likely language for the given text, along with additional
|
@@ -88,21 +61,7 @@ module CLD3
|
|
88
61
|
# The argument is a String object.
|
89
62
|
# The returned value of this function is an instance of Result.
|
90
63
|
def find_language(text)
|
91
|
-
|
92
|
-
pointer = FFI::MemoryPointer.new(:char, text_utf8.bytesize)
|
93
|
-
|
94
|
-
begin
|
95
|
-
pointer.put_bytes(0, text_utf8)
|
96
|
-
|
97
|
-
result = Unstable.NNetLanguageIdentifier_find_language(@cc, pointer, text_utf8.bytesize)
|
98
|
-
begin
|
99
|
-
convert_result Unstable::NNetLanguageIdentifier::Result.new(result)
|
100
|
-
ensure
|
101
|
-
Unstable.delete_result result
|
102
|
-
end
|
103
|
-
ensure
|
104
|
-
pointer.free
|
105
|
-
end
|
64
|
+
@cc.find_language(Result, SpanInfo, text.encode(Encoding::UTF_8))
|
106
65
|
end
|
107
66
|
|
108
67
|
# Splits the input text (up to the first byte, if any, that is not
|
@@ -119,51 +78,15 @@ module CLD3
|
|
119
78
|
# The second argument is Numeric object.
|
120
79
|
# The returned value of this functions is an Array of Result instances.
|
121
80
|
def find_top_n_most_freq_langs(text, num_langs)
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
pointer = FFI::MemoryPointer.new(:char, text_utf8.bytesize)
|
126
|
-
|
127
|
-
begin
|
128
|
-
pointer.put_bytes(0, text_utf8)
|
129
|
-
|
130
|
-
results = Unstable.NNetLanguageIdentifier_find_top_n_most_freq_langs(@cc, pointer, text_utf8.bytesize, num_langs)
|
131
|
-
begin
|
132
|
-
a = num_langs.times
|
133
|
-
.lazy
|
134
|
-
.map { |index| convert_result Unstable.refer_to_nth_result(results, index) }
|
135
|
-
.take_while { |result| !result.nil? }
|
136
|
-
.to_a
|
137
|
-
|
138
|
-
a
|
139
|
-
ensure
|
140
|
-
Unstable.delete_results results
|
141
|
-
end
|
142
|
-
ensure
|
143
|
-
pointer.free
|
144
|
-
end
|
81
|
+
@cc.find_top_n_most_freq_langs(Result, SpanInfo,
|
82
|
+
text.encode(Encoding::UTF_8),
|
83
|
+
num_langs)
|
145
84
|
end
|
146
85
|
|
147
|
-
|
148
|
-
|
149
|
-
def convert_result(result)
|
150
|
-
language = result[:language_data].read_bytes(result[:language_size])
|
151
|
-
return nil if language == "und"
|
152
|
-
|
153
|
-
cursor = result[:byte_ranges_data]
|
154
|
-
byte_ranges = result[:byte_ranges_size].times.map do
|
155
|
-
info = Unstable::NNetLanguageIdentifier::SpanInfo.new(cursor)
|
156
|
-
cursor += Unstable::NNetLanguageIdentifier::SpanInfo.size
|
157
|
-
SpanInfo.new(info[:start_index], info[:end_index], info[:probability])
|
158
|
-
end
|
159
|
-
|
160
|
-
Result.new(
|
161
|
-
language.to_sym,
|
162
|
-
result[:probability],
|
163
|
-
result[:reliable?],
|
164
|
-
result[:proportion],
|
165
|
-
byte_ranges)
|
86
|
+
class Unstable
|
166
87
|
end
|
88
|
+
|
89
|
+
private_constant :Unstable
|
167
90
|
end
|
168
91
|
|
169
92
|
# Encapsulates the TaskContext specifying only the parameters for the model.
|
@@ -171,17 +94,9 @@ module CLD3
|
|
171
94
|
module TaskContextParams
|
172
95
|
# This is an frozen Array object containing symbols.
|
173
96
|
# @type const LANGUAGE_NAMES: untyped
|
174
|
-
LANGUAGE_NAMES = [
|
175
|
-
:eo, :co, :eu, :ta, :de, :mt, :ps, :te, :su, :uz, :'zh-Latn', :ne,
|
176
|
-
:nl, :sw, :sq, :hmn, :ja, :no, :mn, :so, :ko, :kk, :sl, :ig,
|
177
|
-
:mr, :th, :zu, :ml, :hr, :bs, :lo, :sd, :cy, :hy, :uk, :pt,
|
178
|
-
:lv, :iw, :cs, :vi, :jv, :be, :km, :mk, :tr, :fy, :am, :zh,
|
179
|
-
:da, :sv, :fi, :ht, :af, :la, :id, :fil, :sm, :ca, :el, :ka,
|
180
|
-
:sr, :it, :sk, :ru, :'ru-Latn', :bg, :ny, :fa, :haw, :gl, :et,
|
181
|
-
:ms, :gd, :'bg-Latn', :ha, :is, :ur, :mi, :hi, :bn, :'hi-Latn', :fr,
|
182
|
-
:yi, :hu, :xh, :my, :tg, :ro, :ar, :lb, :'el-Latn', :st, :ceb,
|
183
|
-
:kn, :az, :si, :ky, :mg, :en, :gu, :es, :pl, :'ja-Latn', :ga, :lt,
|
184
|
-
:sn, :yo, :pa, :ku,
|
185
|
-
].freeze
|
97
|
+
LANGUAGE_NAMES = []
|
186
98
|
end
|
187
99
|
end
|
100
|
+
|
101
|
+
require "cld3_ext"
|
102
|
+
CLD3::TaskContextParams::LANGUAGE_NAMES.freeze
|
data/sig/cld3.rbs
CHANGED
metadata
CHANGED
@@ -1,95 +1,75 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cld3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Akihiko Odaki
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-02-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: ffi
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: 1.1.0
|
20
|
-
- - "<"
|
21
|
-
- !ruby/object:Gem::Version
|
22
|
-
version: 1.16.0
|
23
|
-
type: :runtime
|
24
|
-
prerelease: false
|
25
|
-
version_requirements: !ruby/object:Gem::Requirement
|
26
|
-
requirements:
|
27
|
-
- - ">="
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
version: 1.1.0
|
30
|
-
- - "<"
|
31
|
-
- !ruby/object:Gem::Version
|
32
|
-
version: 1.16.0
|
33
13
|
- !ruby/object:Gem::Dependency
|
34
14
|
name: rbs
|
35
15
|
requirement: !ruby/object:Gem::Requirement
|
36
16
|
requirements:
|
37
17
|
- - ">="
|
38
18
|
- !ruby/object:Gem::Version
|
39
|
-
version:
|
19
|
+
version: 2.8.0
|
40
20
|
- - "<"
|
41
21
|
- !ruby/object:Gem::Version
|
42
|
-
version:
|
22
|
+
version: 2.9.0
|
43
23
|
type: :development
|
44
24
|
prerelease: false
|
45
25
|
version_requirements: !ruby/object:Gem::Requirement
|
46
26
|
requirements:
|
47
27
|
- - ">="
|
48
28
|
- !ruby/object:Gem::Version
|
49
|
-
version:
|
29
|
+
version: 2.8.0
|
50
30
|
- - "<"
|
51
31
|
- !ruby/object:Gem::Version
|
52
|
-
version:
|
32
|
+
version: 2.9.0
|
53
33
|
- !ruby/object:Gem::Dependency
|
54
34
|
name: rspec
|
55
35
|
requirement: !ruby/object:Gem::Requirement
|
56
36
|
requirements:
|
57
37
|
- - ">="
|
58
38
|
- !ruby/object:Gem::Version
|
59
|
-
version: 3.
|
39
|
+
version: 3.12.0
|
60
40
|
- - "<"
|
61
41
|
- !ruby/object:Gem::Version
|
62
|
-
version: 3.
|
42
|
+
version: 3.13.0
|
63
43
|
type: :development
|
64
44
|
prerelease: false
|
65
45
|
version_requirements: !ruby/object:Gem::Requirement
|
66
46
|
requirements:
|
67
47
|
- - ">="
|
68
48
|
- !ruby/object:Gem::Version
|
69
|
-
version: 3.
|
49
|
+
version: 3.12.0
|
70
50
|
- - "<"
|
71
51
|
- !ruby/object:Gem::Version
|
72
|
-
version: 3.
|
52
|
+
version: 3.13.0
|
73
53
|
- !ruby/object:Gem::Dependency
|
74
54
|
name: steep
|
75
55
|
requirement: !ruby/object:Gem::Requirement
|
76
56
|
requirements:
|
77
57
|
- - ">="
|
78
58
|
- !ruby/object:Gem::Version
|
79
|
-
version:
|
59
|
+
version: 1.3.0
|
80
60
|
- - "<"
|
81
61
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
62
|
+
version: 1.4.0
|
83
63
|
type: :development
|
84
64
|
prerelease: false
|
85
65
|
version_requirements: !ruby/object:Gem::Requirement
|
86
66
|
requirements:
|
87
67
|
- - ">="
|
88
68
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
69
|
+
version: 1.3.0
|
90
70
|
- - "<"
|
91
71
|
- !ruby/object:Gem::Version
|
92
|
-
version:
|
72
|
+
version: 1.4.0
|
93
73
|
description: Compact Language Detector v3 (CLD3) is a neural network model for language
|
94
74
|
identification.
|
95
75
|
email: akihiko.odaki@gmail.com
|
@@ -103,105 +83,71 @@ files:
|
|
103
83
|
- LICENSE_CLD3
|
104
84
|
- README.md
|
105
85
|
- cld3.gemspec
|
106
|
-
- ext/cld3/Makefile
|
107
86
|
- ext/cld3/base.cc
|
108
87
|
- ext/cld3/base.h
|
109
|
-
- ext/cld3/base.o
|
110
88
|
- ext/cld3/casts.h
|
89
|
+
- ext/cld3/cld_3/protos/feature_extractor.pb.h
|
90
|
+
- ext/cld3/cld_3/protos/sentence.pb.h
|
91
|
+
- ext/cld3/cld_3/protos/task_spec.pb.h
|
111
92
|
- ext/cld3/embedding_feature_extractor.cc
|
112
93
|
- ext/cld3/embedding_feature_extractor.h
|
113
|
-
- ext/cld3/embedding_feature_extractor.o
|
114
94
|
- ext/cld3/embedding_network.cc
|
115
95
|
- ext/cld3/embedding_network.h
|
116
|
-
- ext/cld3/embedding_network.o
|
117
96
|
- ext/cld3/embedding_network_params.h
|
118
97
|
- ext/cld3/extconf.rb
|
119
98
|
- ext/cld3/feature_extractor.cc
|
120
99
|
- ext/cld3/feature_extractor.h
|
121
|
-
- ext/cld3/feature_extractor.o
|
122
|
-
- ext/cld3/feature_extractor.pb.o
|
123
|
-
- ext/cld3/feature_extractor.proto
|
124
100
|
- ext/cld3/feature_types.cc
|
125
101
|
- ext/cld3/feature_types.h
|
126
|
-
- ext/cld3/feature_types.o
|
127
102
|
- ext/cld3/fixunicodevalue.cc
|
128
103
|
- ext/cld3/fixunicodevalue.h
|
129
|
-
- ext/cld3/fixunicodevalue.o
|
130
104
|
- ext/cld3/float16.h
|
131
105
|
- ext/cld3/fml_parser.cc
|
132
106
|
- ext/cld3/fml_parser.h
|
133
|
-
- ext/cld3/fml_parser.o
|
134
107
|
- ext/cld3/generated_entities.cc
|
135
|
-
- ext/cld3/generated_entities.o
|
136
108
|
- ext/cld3/generated_ulscript.cc
|
137
109
|
- ext/cld3/generated_ulscript.h
|
138
|
-
- ext/cld3/generated_ulscript.o
|
139
110
|
- ext/cld3/getonescriptspan.cc
|
140
111
|
- ext/cld3/getonescriptspan.h
|
141
|
-
- ext/cld3/getonescriptspan.o
|
142
112
|
- ext/cld3/integral_types.h
|
143
113
|
- ext/cld3/lang_id_nn_params.cc
|
144
114
|
- ext/cld3/lang_id_nn_params.h
|
145
|
-
- ext/cld3/lang_id_nn_params.o
|
146
115
|
- ext/cld3/language_identifier_features.cc
|
147
116
|
- ext/cld3/language_identifier_features.h
|
148
|
-
- ext/cld3/language_identifier_features.o
|
149
|
-
- ext/cld3/libcld3.def
|
150
|
-
- ext/cld3/libcld3.so
|
151
|
-
- ext/cld3/mkmf.log
|
152
117
|
- ext/cld3/nnet_language_identifier.cc
|
153
118
|
- ext/cld3/nnet_language_identifier.h
|
154
|
-
- ext/cld3/nnet_language_identifier.o
|
155
119
|
- ext/cld3/nnet_language_identifier_c.cc
|
156
|
-
- ext/cld3/nnet_language_identifier_c.o
|
157
120
|
- ext/cld3/offsetmap.cc
|
158
121
|
- ext/cld3/offsetmap.h
|
159
|
-
- ext/cld3/offsetmap.o
|
160
122
|
- ext/cld3/port.h
|
161
123
|
- ext/cld3/registry.cc
|
162
124
|
- ext/cld3/registry.h
|
163
|
-
- ext/cld3/registry.o
|
164
125
|
- ext/cld3/relevant_script_feature.cc
|
165
126
|
- ext/cld3/relevant_script_feature.h
|
166
|
-
- ext/cld3/relevant_script_feature.o
|
167
127
|
- ext/cld3/script_detector.h
|
168
|
-
- ext/cld3/sentence.pb.o
|
169
|
-
- ext/cld3/sentence.proto
|
170
128
|
- ext/cld3/sentence_features.cc
|
171
129
|
- ext/cld3/sentence_features.h
|
172
|
-
- ext/cld3/sentence_features.o
|
173
130
|
- ext/cld3/simple_adder.h
|
174
131
|
- ext/cld3/stringpiece.h
|
175
132
|
- ext/cld3/task_context.cc
|
176
133
|
- ext/cld3/task_context.h
|
177
|
-
- ext/cld3/task_context.o
|
178
134
|
- ext/cld3/task_context_params.cc
|
179
135
|
- ext/cld3/task_context_params.h
|
180
|
-
- ext/cld3/task_context_params.o
|
181
|
-
- ext/cld3/task_spec.pb.o
|
182
|
-
- ext/cld3/task_spec.proto
|
183
136
|
- ext/cld3/text_processing.cc
|
184
137
|
- ext/cld3/text_processing.h
|
185
|
-
- ext/cld3/text_processing.o
|
186
138
|
- ext/cld3/unicodetext.cc
|
187
139
|
- ext/cld3/unicodetext.h
|
188
|
-
- ext/cld3/unicodetext.o
|
189
140
|
- ext/cld3/utf8acceptinterchange.h
|
190
141
|
- ext/cld3/utf8prop_lettermarkscriptnum.h
|
191
142
|
- ext/cld3/utf8repl_lettermarklower.h
|
192
143
|
- ext/cld3/utf8scannot_lettermarkspecial.h
|
193
144
|
- ext/cld3/utf8statetable.cc
|
194
145
|
- ext/cld3/utf8statetable.h
|
195
|
-
- ext/cld3/utf8statetable.o
|
196
146
|
- ext/cld3/utils.cc
|
197
147
|
- ext/cld3/utils.h
|
198
|
-
- ext/cld3/utils.o
|
199
148
|
- ext/cld3/workspace.cc
|
200
149
|
- ext/cld3/workspace.h
|
201
|
-
- ext/cld3/workspace.o
|
202
|
-
- lib/a.rb
|
203
150
|
- lib/cld3.rb
|
204
|
-
- lib/cld3/unstable.rb
|
205
151
|
- sig/cld3.rbs
|
206
152
|
homepage: https://github.com/akihikodaki/cld3-ruby
|
207
153
|
licenses:
|
@@ -215,17 +161,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
215
161
|
requirements:
|
216
162
|
- - ">="
|
217
163
|
- !ruby/object:Gem::Version
|
218
|
-
version: 2.
|
164
|
+
version: 2.7.0
|
219
165
|
- - "<"
|
220
166
|
- !ruby/object:Gem::Version
|
221
|
-
version: 3.
|
167
|
+
version: 3.3.0
|
222
168
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
223
169
|
requirements:
|
224
170
|
- - ">="
|
225
171
|
- !ruby/object:Gem::Version
|
226
172
|
version: '0'
|
227
173
|
requirements: []
|
228
|
-
rubygems_version: 3.
|
174
|
+
rubygems_version: 3.3.25
|
229
175
|
signing_key:
|
230
176
|
specification_version: 4
|
231
177
|
summary: Compact Language Detector v3 (CLD3)
|
data/ext/cld3/Makefile
DELETED
@@ -1,267 +0,0 @@
|
|
1
|
-
|
2
|
-
SHELL = /bin/sh
|
3
|
-
|
4
|
-
# V=0 quiet, V=1 verbose. other values don't work.
|
5
|
-
V = 1
|
6
|
-
Q1 = $(V:1=)
|
7
|
-
Q = $(Q1:0=@)
|
8
|
-
ECHO1 = $(V:1=@ :)
|
9
|
-
ECHO = $(ECHO1:0=@ echo)
|
10
|
-
NULLCMD = :
|
11
|
-
|
12
|
-
#### Start of system configuration section. ####
|
13
|
-
|
14
|
-
srcdir = .
|
15
|
-
topdir = /usr/include
|
16
|
-
hdrdir = $(topdir)
|
17
|
-
arch_hdrdir = /usr/include
|
18
|
-
PATH_SEPARATOR = :
|
19
|
-
VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
|
20
|
-
prefix = $(DESTDIR)/usr
|
21
|
-
rubysitearchprefix = $(sitearchlibdir)/$(RUBY_BASE_NAME)
|
22
|
-
rubyarchprefix = $(DESTDIR)/usr/lib64/ruby
|
23
|
-
rubylibprefix = $(exec_prefix)/share/ruby
|
24
|
-
exec_prefix = $(DESTDIR)/usr
|
25
|
-
vendorarchhdrdir = $(vendorhdrdir)/$(arch)
|
26
|
-
sitearchhdrdir = $(sitehdrdir)/$(arch)
|
27
|
-
rubyarchhdrdir = $(DESTDIR)/usr/include
|
28
|
-
vendorhdrdir = $(rubyhdrdir)/vendor_ruby
|
29
|
-
sitehdrdir = $(rubyhdrdir)/site_ruby
|
30
|
-
rubyhdrdir = $(DESTDIR)/usr/include
|
31
|
-
rubygemsdir = $(DESTDIR)/usr/share/rubygems
|
32
|
-
vendorarchdir = $(DESTDIR)/usr/lib64/ruby/vendor_ruby
|
33
|
-
vendorlibdir = $(vendordir)
|
34
|
-
vendordir = $(DESTDIR)/usr/share/ruby/vendor_ruby
|
35
|
-
sitearchdir = $(DESTDIR)/usr/local/lib64/ruby/site_ruby
|
36
|
-
sitelibdir = $(sitedir)
|
37
|
-
sitedir = $(DESTDIR)/usr/local/share/ruby/site_ruby
|
38
|
-
rubyarchdir = $(rubyarchprefix)
|
39
|
-
rubylibdir = $(rubylibprefix)
|
40
|
-
sitearchincludedir = $(includedir)/$(sitearch)
|
41
|
-
archincludedir = $(includedir)/$(arch)
|
42
|
-
sitearchlibdir = $(libdir)/$(sitearch)
|
43
|
-
archlibdir = $(DESTDIR)/usr/lib64
|
44
|
-
ridir = $(datarootdir)/$(RI_BASE_NAME)
|
45
|
-
mandir = $(DESTDIR)/usr/share/man
|
46
|
-
localedir = $(datarootdir)/locale
|
47
|
-
libdir = $(exec_prefix)/lib64
|
48
|
-
psdir = $(docdir)
|
49
|
-
pdfdir = $(docdir)
|
50
|
-
dvidir = $(docdir)
|
51
|
-
htmldir = $(docdir)
|
52
|
-
infodir = $(DESTDIR)/usr/share/info
|
53
|
-
docdir = $(datarootdir)/doc/$(PACKAGE)
|
54
|
-
oldincludedir = $(DESTDIR)/usr/include
|
55
|
-
includedir = $(DESTDIR)/usr/include
|
56
|
-
runstatedir = $(localstatedir)/run
|
57
|
-
localstatedir = $(DESTDIR)/var
|
58
|
-
sharedstatedir = $(DESTDIR)/var/lib
|
59
|
-
sysconfdir = $(DESTDIR)/etc
|
60
|
-
datadir = $(DESTDIR)/usr/share
|
61
|
-
datarootdir = $(prefix)/share
|
62
|
-
libexecdir = $(DESTDIR)/usr/libexec
|
63
|
-
sbindir = $(DESTDIR)/usr/sbin
|
64
|
-
bindir = $(exec_prefix)/bin
|
65
|
-
archdir = $(rubyarchdir)
|
66
|
-
|
67
|
-
|
68
|
-
CC_WRAPPER =
|
69
|
-
CC = gcc
|
70
|
-
CXX = g++
|
71
|
-
LIBRUBY = $(LIBRUBY_SO)
|
72
|
-
LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
|
73
|
-
LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
|
74
|
-
LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static $(MAINLIBS)
|
75
|
-
empty =
|
76
|
-
OUTFLAG = -o $(empty)
|
77
|
-
COUTFLAG = -o $(empty)
|
78
|
-
CSRCFLAG = $(empty)
|
79
|
-
|
80
|
-
RUBY_EXTCONF_H =
|
81
|
-
cflags = $(optflags) $(debugflags) $(warnflags)
|
82
|
-
cxxflags =
|
83
|
-
optflags = -O3
|
84
|
-
debugflags = -ggdb3
|
85
|
-
warnflags = -Wall -Wextra -Wdeprecated-declarations -Wduplicated-cond -Wimplicit-function-declaration -Wimplicit-int -Wmisleading-indentation -Wpointer-arith -Wwrite-strings -Wimplicit-fallthrough=0 -Wmissing-noreturn -Wno-cast-function-type -Wno-constant-logical-operand -Wno-long-long -Wno-missing-field-initializers -Wno-overlength-strings -Wno-packed-bitfield-compat -Wno-parentheses-equality -Wno-self-assign -Wno-tautological-compare -Wno-unused-parameter -Wno-unused-value -Wsuggest-attribute=format -Wsuggest-attribute=noreturn -Wunused-variable
|
86
|
-
cppflags =
|
87
|
-
CCDLFLAGS = -fPIC
|
88
|
-
CFLAGS = $(CCDLFLAGS) -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 -fstack-protector-strong -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -mbranch-protection=standard -fasynchronous-unwind-tables -fstack-clash-protection -fPIC $(ARCH_FLAG)
|
89
|
-
INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir)
|
90
|
-
DEFS =
|
91
|
-
CPPFLAGS = $(DEFS) $(cppflags)
|
92
|
-
CXXFLAGS = $(CCDLFLAGS) -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 -fstack-protector-strong -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -mbranch-protection=standard -fasynchronous-unwind-tables -fstack-clash-protection -fvisibility=hidden -std=c++11 $(ARCH_FLAG)
|
93
|
-
ldflags = -L. -Wl,-z,relro -Wl,--as-needed -Wl,-z,now -specs=/usr/lib/rpm/redhat/redhat-hardened-ld -fstack-protector-strong -rdynamic -Wl,-export-dynamic
|
94
|
-
dldflags = -Wl,-z,relro -Wl,--as-needed -Wl,-z,now -specs=/usr/lib/rpm/redhat/redhat-hardened-ld
|
95
|
-
ARCH_FLAG =
|
96
|
-
DLDFLAGS = $(ldflags) $(dldflags) $(ARCH_FLAG)
|
97
|
-
LDSHARED = $(CC) -shared
|
98
|
-
LDSHAREDXX = $(CXX) -shared
|
99
|
-
AR = gcc-ar
|
100
|
-
EXEEXT =
|
101
|
-
|
102
|
-
RUBY_INSTALL_NAME = $(RUBY_BASE_NAME)
|
103
|
-
RUBY_SO_NAME = ruby
|
104
|
-
RUBYW_INSTALL_NAME =
|
105
|
-
RUBY_VERSION_NAME = $(RUBY_BASE_NAME)-$(ruby_version_dir_name)
|
106
|
-
RUBYW_BASE_NAME = rubyw
|
107
|
-
RUBY_BASE_NAME = ruby
|
108
|
-
|
109
|
-
arch = aarch64-linux
|
110
|
-
sitearch = $(arch)
|
111
|
-
ruby_version = 3.0.0
|
112
|
-
ruby = $(bindir)/$(RUBY_BASE_NAME)
|
113
|
-
RUBY = $(ruby)
|
114
|
-
ruby_headers = $(hdrdir)/ruby.h $(hdrdir)/ruby/backward.h $(hdrdir)/ruby/ruby.h $(hdrdir)/ruby/defines.h $(hdrdir)/ruby/missing.h $(hdrdir)/ruby/intern.h $(hdrdir)/ruby/st.h $(hdrdir)/ruby/subst.h $(arch_hdrdir)/ruby/config.h
|
115
|
-
|
116
|
-
RM = rm -f
|
117
|
-
RM_RF = $(RUBY) -run -e rm -- -rf
|
118
|
-
RMDIRS = rmdir --ignore-fail-on-non-empty -p
|
119
|
-
MAKEDIRS = /usr/bin/mkdir -p
|
120
|
-
INSTALL = /usr/bin/install -c
|
121
|
-
INSTALL_PROG = $(INSTALL) -m 0755
|
122
|
-
INSTALL_DATA = $(INSTALL) -m 644
|
123
|
-
COPY = cp
|
124
|
-
TOUCH = exit >
|
125
|
-
|
126
|
-
#### End of system configuration section. ####
|
127
|
-
|
128
|
-
preload =
|
129
|
-
libpath = . $(archlibdir)
|
130
|
-
LIBPATH = -L. -L$(archlibdir)
|
131
|
-
DEFFILE =
|
132
|
-
|
133
|
-
CLEANFILES = mkmf.log
|
134
|
-
DISTCLEANFILES =
|
135
|
-
DISTCLEANDIRS =
|
136
|
-
|
137
|
-
extout =
|
138
|
-
extout_prefix =
|
139
|
-
target_prefix =
|
140
|
-
LOCAL_LIBS =
|
141
|
-
LIBS = -lprotobuf -lpthread -lm -lc
|
142
|
-
ORIG_SRCS = base.cc embedding_feature_extractor.cc embedding_network.cc feature_extractor.cc feature_extractor.pb.cc feature_types.cc fixunicodevalue.cc fml_parser.cc generated_entities.cc generated_ulscript.cc getonescriptspan.cc lang_id_nn_params.cc language_identifier_features.cc nnet_language_identifier.cc nnet_language_identifier_c.cc offsetmap.cc registry.cc relevant_script_feature.cc sentence.pb.cc sentence_features.cc task_context.cc task_context_params.cc task_spec.pb.cc text_processing.cc unicodetext.cc utf8statetable.cc utils.cc workspace.cc
|
143
|
-
SRCS = $(ORIG_SRCS)
|
144
|
-
OBJS = base.o embedding_feature_extractor.o embedding_network.o feature_extractor.o feature_extractor.pb.o feature_types.o fixunicodevalue.o fml_parser.o generated_entities.o generated_ulscript.o getonescriptspan.o lang_id_nn_params.o language_identifier_features.o nnet_language_identifier.o nnet_language_identifier_c.o offsetmap.o registry.o relevant_script_feature.o sentence.pb.o sentence_features.o task_context.o task_context_params.o task_spec.pb.o text_processing.o unicodetext.o utf8statetable.o utils.o workspace.o
|
145
|
-
HDRS = $(srcdir)/base.h $(srcdir)/casts.h $(srcdir)/embedding_feature_extractor.h $(srcdir)/embedding_network.h $(srcdir)/embedding_network_params.h $(srcdir)/feature_extractor.h $(srcdir)/feature_extractor.pb.h $(srcdir)/feature_types.h $(srcdir)/fixunicodevalue.h $(srcdir)/float16.h $(srcdir)/fml_parser.h $(srcdir)/generated_ulscript.h $(srcdir)/getonescriptspan.h $(srcdir)/integral_types.h $(srcdir)/lang_id_nn_params.h $(srcdir)/language_identifier_features.h $(srcdir)/nnet_language_identifier.h $(srcdir)/offsetmap.h $(srcdir)/port.h $(srcdir)/registry.h $(srcdir)/relevant_script_feature.h $(srcdir)/script_detector.h $(srcdir)/sentence.pb.h $(srcdir)/sentence_features.h $(srcdir)/simple_adder.h $(srcdir)/stringpiece.h $(srcdir)/task_context.h $(srcdir)/task_context_params.h $(srcdir)/task_spec.pb.h $(srcdir)/text_processing.h $(srcdir)/unicodetext.h $(srcdir)/utf8acceptinterchange.h $(srcdir)/utf8prop_lettermarkscriptnum.h $(srcdir)/utf8repl_lettermarklower.h $(srcdir)/utf8scannot_lettermarkspecial.h $(srcdir)/utf8statetable.h $(srcdir)/utils.h $(srcdir)/workspace.h
|
146
|
-
LOCAL_HDRS =
|
147
|
-
TARGET = libcld3
|
148
|
-
TARGET_NAME = libcld3
|
149
|
-
TARGET_ENTRY = Init_$(TARGET_NAME)
|
150
|
-
DLLIB = $(TARGET).so
|
151
|
-
EXTSTATIC =
|
152
|
-
STATIC_LIB =
|
153
|
-
|
154
|
-
TIMESTAMP_DIR = .
|
155
|
-
BINDIR = $(bindir)
|
156
|
-
RUBYCOMMONDIR = $(sitedir)$(target_prefix)
|
157
|
-
RUBYLIBDIR = $(sitelibdir)$(target_prefix)
|
158
|
-
RUBYARCHDIR = $(sitearchdir)$(target_prefix)
|
159
|
-
HDRDIR = $(sitehdrdir)$(target_prefix)
|
160
|
-
ARCHHDRDIR = $(sitearchhdrdir)$(target_prefix)
|
161
|
-
TARGET_SO_DIR =
|
162
|
-
TARGET_SO = $(TARGET_SO_DIR)$(DLLIB)
|
163
|
-
CLEANLIBS = $(TARGET_SO)
|
164
|
-
CLEANOBJS = *.o *.bak
|
165
|
-
|
166
|
-
all: $(DLLIB)
|
167
|
-
static: $(STATIC_LIB)
|
168
|
-
.PHONY: all install static install-so install-rb
|
169
|
-
.PHONY: clean clean-so clean-static clean-rb
|
170
|
-
|
171
|
-
clean-static::
|
172
|
-
clean-rb-default::
|
173
|
-
clean-rb::
|
174
|
-
clean-so::
|
175
|
-
clean: clean-so clean-static clean-rb-default clean-rb
|
176
|
-
-$(Q)$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES) .*.time
|
177
|
-
|
178
|
-
distclean-rb-default::
|
179
|
-
distclean-rb::
|
180
|
-
distclean-so::
|
181
|
-
distclean-static::
|
182
|
-
distclean: clean distclean-so distclean-static distclean-rb-default distclean-rb
|
183
|
-
-$(Q)$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
|
184
|
-
-$(Q)$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
|
185
|
-
-$(Q)$(RMDIRS) $(DISTCLEANDIRS) 2> /dev/null || true
|
186
|
-
|
187
|
-
realclean: distclean
|
188
|
-
install: install-so install-rb
|
189
|
-
|
190
|
-
install-so: $(DLLIB) $(TIMESTAMP_DIR)/.sitearchdir.time
|
191
|
-
$(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
|
192
|
-
clean-static::
|
193
|
-
-$(Q)$(RM) $(STATIC_LIB)
|
194
|
-
install-rb: pre-install-rb do-install-rb install-rb-default
|
195
|
-
install-rb-default: pre-install-rb-default do-install-rb-default
|
196
|
-
pre-install-rb: Makefile
|
197
|
-
pre-install-rb-default: Makefile
|
198
|
-
do-install-rb:
|
199
|
-
do-install-rb-default:
|
200
|
-
pre-install-rb-default:
|
201
|
-
@$(NULLCMD)
|
202
|
-
$(TIMESTAMP_DIR)/.sitearchdir.time:
|
203
|
-
$(Q) $(MAKEDIRS) $(@D) $(RUBYARCHDIR)
|
204
|
-
$(Q) $(TOUCH) $@
|
205
|
-
|
206
|
-
site-install: site-install-so site-install-rb
|
207
|
-
site-install-so: install-so
|
208
|
-
site-install-rb: install-rb
|
209
|
-
|
210
|
-
.SUFFIXES: .c .m .cc .mm .cxx .cpp .o .S
|
211
|
-
|
212
|
-
.cc.o:
|
213
|
-
$(ECHO) compiling $(<)
|
214
|
-
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
|
215
|
-
|
216
|
-
.cc.S:
|
217
|
-
$(ECHO) translating $(<)
|
218
|
-
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
|
219
|
-
|
220
|
-
.mm.o:
|
221
|
-
$(ECHO) compiling $(<)
|
222
|
-
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
|
223
|
-
|
224
|
-
.mm.S:
|
225
|
-
$(ECHO) translating $(<)
|
226
|
-
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
|
227
|
-
|
228
|
-
.cxx.o:
|
229
|
-
$(ECHO) compiling $(<)
|
230
|
-
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
|
231
|
-
|
232
|
-
.cxx.S:
|
233
|
-
$(ECHO) translating $(<)
|
234
|
-
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
|
235
|
-
|
236
|
-
.cpp.o:
|
237
|
-
$(ECHO) compiling $(<)
|
238
|
-
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
|
239
|
-
|
240
|
-
.cpp.S:
|
241
|
-
$(ECHO) translating $(<)
|
242
|
-
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
|
243
|
-
|
244
|
-
.c.o:
|
245
|
-
$(ECHO) compiling $(<)
|
246
|
-
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
|
247
|
-
|
248
|
-
.c.S:
|
249
|
-
$(ECHO) translating $(<)
|
250
|
-
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
|
251
|
-
|
252
|
-
.m.o:
|
253
|
-
$(ECHO) compiling $(<)
|
254
|
-
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
|
255
|
-
|
256
|
-
.m.S:
|
257
|
-
$(ECHO) translating $(<)
|
258
|
-
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
|
259
|
-
|
260
|
-
$(TARGET_SO): $(OBJS) Makefile
|
261
|
-
$(ECHO) linking shared-object $(DLLIB)
|
262
|
-
-$(Q)$(RM) $(@)
|
263
|
-
$(Q) $(LDSHAREDXX) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
$(OBJS): $(HDRS) $(ruby_headers)
|
data/ext/cld3/base.o
DELETED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|