cld3 3.2.5 → 3.2.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/{ext/cld3/ext/LICENSE → LICENSE_CLD3} +0 -0
- data/cld3.gemspec +1 -1
- data/ext/cld3/Makefile +266 -0
- data/ext/cld3/{ext/src/base.cc → base.cc} +0 -0
- data/ext/cld3/{ext/src/base.h → base.h} +0 -0
- data/ext/cld3/base.o +0 -0
- data/ext/cld3/{ext/src/casts.h → casts.h} +0 -0
- data/ext/cld3/{ext/src/embedding_feature_extractor.cc → embedding_feature_extractor.cc} +0 -0
- data/ext/cld3/{ext/src/embedding_feature_extractor.h → embedding_feature_extractor.h} +0 -0
- data/ext/cld3/embedding_feature_extractor.o +0 -0
- data/ext/cld3/{ext/src/embedding_network.cc → embedding_network.cc} +0 -0
- data/ext/cld3/{ext/src/embedding_network.h → embedding_network.h} +0 -0
- data/ext/cld3/embedding_network.o +0 -0
- data/ext/cld3/{ext/src/embedding_network_params.h → embedding_network_params.h} +0 -0
- data/ext/cld3/{ext/src/feature_extractor.cc → feature_extractor.cc} +0 -0
- data/ext/cld3/{ext/src/feature_extractor.h → feature_extractor.h} +0 -0
- data/ext/cld3/feature_extractor.o +0 -0
- data/ext/cld3/feature_extractor.pb.o +0 -0
- data/ext/cld3/{ext/src/feature_extractor.proto → feature_extractor.proto} +0 -0
- data/ext/cld3/{ext/src/feature_types.cc → feature_types.cc} +0 -0
- data/ext/cld3/{ext/src/feature_types.h → feature_types.h} +0 -0
- data/ext/cld3/feature_types.o +0 -0
- data/ext/cld3/{ext/src/script_span/fixunicodevalue.cc → fixunicodevalue.cc} +0 -0
- data/ext/cld3/{ext/src/script_span/fixunicodevalue.h → fixunicodevalue.h} +0 -0
- data/ext/cld3/fixunicodevalue.o +0 -0
- data/ext/cld3/{ext/src/float16.h → float16.h} +0 -0
- data/ext/cld3/{ext/src/fml_parser.cc → fml_parser.cc} +0 -0
- data/ext/cld3/{ext/src/fml_parser.h → fml_parser.h} +0 -0
- data/ext/cld3/fml_parser.o +0 -0
- data/ext/cld3/{ext/src/script_span/generated_entities.cc → generated_entities.cc} +0 -0
- data/ext/cld3/generated_entities.o +0 -0
- data/ext/cld3/{ext/src/script_span/generated_ulscript.cc → generated_ulscript.cc} +0 -0
- data/ext/cld3/{ext/src/script_span/generated_ulscript.h → generated_ulscript.h} +0 -0
- data/ext/cld3/generated_ulscript.o +0 -0
- data/ext/cld3/{ext/src/script_span/getonescriptspan.cc → getonescriptspan.cc} +0 -0
- data/ext/cld3/{ext/src/script_span/getonescriptspan.h → getonescriptspan.h} +0 -0
- data/ext/cld3/getonescriptspan.o +0 -0
- data/ext/cld3/{ext/src/script_span/integral_types.h → integral_types.h} +0 -0
- data/ext/cld3/{ext/src/lang_id_nn_params.cc → lang_id_nn_params.cc} +0 -0
- data/ext/cld3/{ext/src/lang_id_nn_params.h → lang_id_nn_params.h} +0 -0
- data/ext/cld3/lang_id_nn_params.o +0 -0
- data/ext/cld3/{ext/src/language_identifier_features.cc → language_identifier_features.cc} +0 -0
- data/ext/cld3/{ext/src/language_identifier_features.h → language_identifier_features.h} +0 -0
- data/ext/cld3/language_identifier_features.o +0 -0
- data/ext/cld3/libcld3.so +0 -0
- data/ext/cld3/mkmf.log +36 -0
- data/ext/cld3/{ext/src/nnet_language_identifier.cc → nnet_language_identifier.cc} +0 -0
- data/ext/cld3/{ext/src/nnet_language_identifier.h → nnet_language_identifier.h} +0 -0
- data/ext/cld3/nnet_language_identifier.o +0 -0
- data/ext/cld3/nnet_language_identifier_c.o +0 -0
- data/ext/cld3/{ext/src/script_span/offsetmap.cc → offsetmap.cc} +0 -0
- data/ext/cld3/{ext/src/script_span/offsetmap.h → offsetmap.h} +0 -0
- data/ext/cld3/offsetmap.o +0 -0
- data/ext/cld3/{ext/src/script_span/port.h → port.h} +0 -0
- data/ext/cld3/{ext/src/registry.cc → registry.cc} +0 -0
- data/ext/cld3/{ext/src/registry.h → registry.h} +0 -0
- data/ext/cld3/registry.o +0 -0
- data/ext/cld3/{ext/src/relevant_script_feature.cc → relevant_script_feature.cc} +0 -0
- data/ext/cld3/{ext/src/relevant_script_feature.h → relevant_script_feature.h} +0 -0
- data/ext/cld3/relevant_script_feature.o +0 -0
- data/ext/cld3/{ext/src/script_detector.h → script_detector.h} +0 -0
- data/ext/cld3/sentence.pb.o +0 -0
- data/ext/cld3/{ext/src/sentence.proto → sentence.proto} +0 -0
- data/ext/cld3/{ext/src/sentence_features.cc → sentence_features.cc} +0 -0
- data/ext/cld3/{ext/src/sentence_features.h → sentence_features.h} +0 -0
- data/ext/cld3/sentence_features.o +0 -0
- data/ext/cld3/{ext/src/simple_adder.h → simple_adder.h} +0 -0
- data/ext/cld3/{ext/src/script_span/stringpiece.h → stringpiece.h} +0 -0
- data/ext/cld3/{ext/src/task_context.cc → task_context.cc} +0 -0
- data/ext/cld3/{ext/src/task_context.h → task_context.h} +0 -0
- data/ext/cld3/task_context.o +0 -0
- data/ext/cld3/{ext/src/task_context_params.cc → task_context_params.cc} +0 -0
- data/ext/cld3/{ext/src/task_context_params.h → task_context_params.h} +0 -0
- data/ext/cld3/task_context_params.o +0 -0
- data/ext/cld3/task_spec.pb.o +0 -0
- data/ext/cld3/{ext/src/task_spec.proto → task_spec.proto} +0 -0
- data/ext/cld3/{ext/src/script_span/text_processing.cc → text_processing.cc} +0 -0
- data/ext/cld3/{ext/src/script_span/text_processing.h → text_processing.h} +0 -0
- data/ext/cld3/text_processing.o +0 -0
- data/ext/cld3/{ext/src/unicodetext.cc → unicodetext.cc} +0 -0
- data/ext/cld3/{ext/src/unicodetext.h → unicodetext.h} +0 -0
- data/ext/cld3/unicodetext.o +0 -0
- data/ext/cld3/{ext/src/script_span/utf8acceptinterchange.h → utf8acceptinterchange.h} +0 -0
- data/ext/cld3/{ext/src/script_span/utf8prop_lettermarkscriptnum.h → utf8prop_lettermarkscriptnum.h} +0 -0
- data/ext/cld3/{ext/src/script_span/utf8repl_lettermarklower.h → utf8repl_lettermarklower.h} +0 -0
- data/ext/cld3/{ext/src/script_span/utf8scannot_lettermarkspecial.h → utf8scannot_lettermarkspecial.h} +0 -0
- data/ext/cld3/{ext/src/script_span/utf8statetable.cc → utf8statetable.cc} +0 -0
- data/ext/cld3/{ext/src/script_span/utf8statetable.h → utf8statetable.h} +0 -0
- data/ext/cld3/utf8statetable.o +0 -0
- data/ext/cld3/{ext/src/utils.cc → utils.cc} +0 -0
- data/ext/cld3/{ext/src/utils.h → utils.h} +0 -0
- data/ext/cld3/utils.o +0 -0
- data/ext/cld3/{ext/src/workspace.cc → workspace.cc} +0 -0
- data/ext/cld3/{ext/src/workspace.h → workspace.h} +0 -0
- data/ext/cld3/workspace.o +0 -0
- metadata +96 -81
- data/ext/cld3/ext/CMakeLists.txt +0 -69
- data/ext/cld3/ext/CONTRIBUTING.md +0 -26
- data/ext/cld3/ext/README.md +0 -73
- data/ext/cld3/ext/misc/myprotobuf.cmake +0 -58
- data/ext/cld3/ext/model.png +0 -0
- data/ext/cld3/ext/src/BUILD.gn +0 -133
- data/ext/cld3/ext/src/DEPS +0 -4
- data/ext/cld3/ext/src/language_identifier_features_test.cc +0 -261
- data/ext/cld3/ext/src/language_identifier_main.cc +0 -54
- data/ext/cld3/ext/src/nnet_lang_id_test.cc +0 -254
- data/ext/cld3/ext/src/nnet_lang_id_test_data.cc +0 -529
- data/ext/cld3/ext/src/nnet_lang_id_test_data.h +0 -117
- data/ext/cld3/ext/src/relevant_script_feature_test.cc +0 -259
- data/ext/cld3/ext/src/script_detector_test.cc +0 -161
- data/ext/cld3/ext/src/script_span/README.md +0 -11
- data/ext/cld3/ext/src/script_span/getonescriptspan_test.cc +0 -135
@@ -1,254 +0,0 @@
|
|
1
|
-
/* Copyright 2016 Google Inc. All Rights Reserved.
|
2
|
-
|
3
|
-
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
you may not use this file except in compliance with the License.
|
5
|
-
You may obtain a copy of the License at
|
6
|
-
|
7
|
-
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
|
9
|
-
Unless required by applicable law or agreed to in writing, software
|
10
|
-
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
See the License for the specific language governing permissions and
|
13
|
-
limitations under the License.
|
14
|
-
==============================================================================*/
|
15
|
-
|
16
|
-
#include <cmath>
|
17
|
-
#include <iostream>
|
18
|
-
#include <string>
|
19
|
-
#include <utility>
|
20
|
-
#include <vector>
|
21
|
-
|
22
|
-
#include "base.h"
|
23
|
-
#include "nnet_lang_id_test_data.h"
|
24
|
-
#include "nnet_language_identifier.h"
|
25
|
-
|
26
|
-
namespace chrome_lang_id {
|
27
|
-
namespace nnet_lang_id_test {
|
28
|
-
|
29
|
-
// Tests the model on all supported languages. Returns "true" if the test is
|
30
|
-
// successful and "false" otherwise.
|
31
|
-
// TODO(abakalov): Add a test for random input that should be labeled as
|
32
|
-
// "unknown" due to low confidence.
|
33
|
-
bool TestPredictions() {
|
34
|
-
std::cout << "Running " << __FUNCTION__ << std::endl;
|
35
|
-
|
36
|
-
// (gold language, sample text) pairs used for testing.
|
37
|
-
const std::vector<std::pair<std::string, std::string>> gold_lang_text = {
|
38
|
-
{"af", NNetLangIdTestData::kTestStrAF},
|
39
|
-
{"ar", NNetLangIdTestData::kTestStrAR},
|
40
|
-
{"az", NNetLangIdTestData::kTestStrAZ},
|
41
|
-
{"be", NNetLangIdTestData::kTestStrBE},
|
42
|
-
{"bg", NNetLangIdTestData::kTestStrBG},
|
43
|
-
{"bn", NNetLangIdTestData::kTestStrBN},
|
44
|
-
{"bs", NNetLangIdTestData::kTestStrBS},
|
45
|
-
{"ca", NNetLangIdTestData::kTestStrCA},
|
46
|
-
{"ceb", NNetLangIdTestData::kTestStrCEB},
|
47
|
-
{"cs", NNetLangIdTestData::kTestStrCS},
|
48
|
-
{"cy", NNetLangIdTestData::kTestStrCY},
|
49
|
-
{"da", NNetLangIdTestData::kTestStrDA},
|
50
|
-
{"de", NNetLangIdTestData::kTestStrDE},
|
51
|
-
{"el", NNetLangIdTestData::kTestStrEL},
|
52
|
-
{"en", NNetLangIdTestData::kTestStrEN},
|
53
|
-
{"eo", NNetLangIdTestData::kTestStrEO},
|
54
|
-
{"es", NNetLangIdTestData::kTestStrES},
|
55
|
-
{"et", NNetLangIdTestData::kTestStrET},
|
56
|
-
{"eu", NNetLangIdTestData::kTestStrEU},
|
57
|
-
{"fa", NNetLangIdTestData::kTestStrFA},
|
58
|
-
{"fi", NNetLangIdTestData::kTestStrFI},
|
59
|
-
{"fil", NNetLangIdTestData::kTestStrFIL},
|
60
|
-
{"fr", NNetLangIdTestData::kTestStrFR},
|
61
|
-
{"ga", NNetLangIdTestData::kTestStrGA},
|
62
|
-
{"gl", NNetLangIdTestData::kTestStrGL},
|
63
|
-
{"gu", NNetLangIdTestData::kTestStrGU},
|
64
|
-
{"ha", NNetLangIdTestData::kTestStrHA},
|
65
|
-
{"hi", NNetLangIdTestData::kTestStrHI},
|
66
|
-
{"hmn", NNetLangIdTestData::kTestStrHMN},
|
67
|
-
{"hr", NNetLangIdTestData::kTestStrHR},
|
68
|
-
{"ht", NNetLangIdTestData::kTestStrHT},
|
69
|
-
{"hu", NNetLangIdTestData::kTestStrHU},
|
70
|
-
{"hy", NNetLangIdTestData::kTestStrHY},
|
71
|
-
{"id", NNetLangIdTestData::kTestStrID},
|
72
|
-
{"ig", NNetLangIdTestData::kTestStrIG},
|
73
|
-
{"is", NNetLangIdTestData::kTestStrIS},
|
74
|
-
{"it", NNetLangIdTestData::kTestStrIT},
|
75
|
-
{"iw", NNetLangIdTestData::kTestStrIW},
|
76
|
-
{"ja", NNetLangIdTestData::kTestStrJA},
|
77
|
-
{"jv", NNetLangIdTestData::kTestStrJV},
|
78
|
-
{"ka", NNetLangIdTestData::kTestStrKA},
|
79
|
-
{"kk", NNetLangIdTestData::kTestStrKK},
|
80
|
-
{"km", NNetLangIdTestData::kTestStrKM},
|
81
|
-
{"kn", NNetLangIdTestData::kTestStrKN},
|
82
|
-
{"ko", NNetLangIdTestData::kTestStrKO},
|
83
|
-
{"la", NNetLangIdTestData::kTestStrLA},
|
84
|
-
{"lo", NNetLangIdTestData::kTestStrLO},
|
85
|
-
{"lt", NNetLangIdTestData::kTestStrLT},
|
86
|
-
{"lv", NNetLangIdTestData::kTestStrLV},
|
87
|
-
{"mg", NNetLangIdTestData::kTestStrMG},
|
88
|
-
{"mi", NNetLangIdTestData::kTestStrMI},
|
89
|
-
{"mk", NNetLangIdTestData::kTestStrMK},
|
90
|
-
{"ml", NNetLangIdTestData::kTestStrML},
|
91
|
-
{"mn", NNetLangIdTestData::kTestStrMN},
|
92
|
-
{"mr", NNetLangIdTestData::kTestStrMR},
|
93
|
-
{"ms", NNetLangIdTestData::kTestStrMS},
|
94
|
-
{"mt", NNetLangIdTestData::kTestStrMT},
|
95
|
-
{"my", NNetLangIdTestData::kTestStrMY},
|
96
|
-
{"ne", NNetLangIdTestData::kTestStrNE},
|
97
|
-
{"nl", NNetLangIdTestData::kTestStrNL},
|
98
|
-
{"no", NNetLangIdTestData::kTestStrNO},
|
99
|
-
{"ny", NNetLangIdTestData::kTestStrNY},
|
100
|
-
{"pa", NNetLangIdTestData::kTestStrPA},
|
101
|
-
{"pl", NNetLangIdTestData::kTestStrPL},
|
102
|
-
{"pt", NNetLangIdTestData::kTestStrPT},
|
103
|
-
{"ro", NNetLangIdTestData::kTestStrRO},
|
104
|
-
{"ru", NNetLangIdTestData::kTestStrRU},
|
105
|
-
{"si", NNetLangIdTestData::kTestStrSI},
|
106
|
-
{"sk", NNetLangIdTestData::kTestStrSK},
|
107
|
-
{"sl", NNetLangIdTestData::kTestStrSL},
|
108
|
-
{"so", NNetLangIdTestData::kTestStrSO},
|
109
|
-
{"sq", NNetLangIdTestData::kTestStrSQ},
|
110
|
-
{"sr", NNetLangIdTestData::kTestStrSR},
|
111
|
-
{"st", NNetLangIdTestData::kTestStrST},
|
112
|
-
{"su", NNetLangIdTestData::kTestStrSU},
|
113
|
-
{"sv", NNetLangIdTestData::kTestStrSV},
|
114
|
-
{"sw", NNetLangIdTestData::kTestStrSW},
|
115
|
-
{"ta", NNetLangIdTestData::kTestStrTA},
|
116
|
-
{"te", NNetLangIdTestData::kTestStrTE},
|
117
|
-
{"tg", NNetLangIdTestData::kTestStrTG},
|
118
|
-
{"th", NNetLangIdTestData::kTestStrTH},
|
119
|
-
{"tr", NNetLangIdTestData::kTestStrTR},
|
120
|
-
{"uk", NNetLangIdTestData::kTestStrUK},
|
121
|
-
{"ur", NNetLangIdTestData::kTestStrUR},
|
122
|
-
{"uz", NNetLangIdTestData::kTestStrUZ},
|
123
|
-
{"vi", NNetLangIdTestData::kTestStrVI},
|
124
|
-
{"yi", NNetLangIdTestData::kTestStrYI},
|
125
|
-
{"yo", NNetLangIdTestData::kTestStrYO},
|
126
|
-
{"zh", NNetLangIdTestData::kTestStrZH},
|
127
|
-
{"zu", NNetLangIdTestData::kTestStrZU}};
|
128
|
-
|
129
|
-
NNetLanguageIdentifier lang_id(/*min_num_bytes=*/0,
|
130
|
-
/*max_num_bytes=*/1000);
|
131
|
-
|
132
|
-
// Iterate over all the test instances, make predictions and check that they
|
133
|
-
// are correct.
|
134
|
-
int num_wrong = 0;
|
135
|
-
for (const auto &test_instance : gold_lang_text) {
|
136
|
-
const std::string &expected_lang = test_instance.first;
|
137
|
-
const std::string &text = test_instance.second;
|
138
|
-
|
139
|
-
const NNetLanguageIdentifier::Result result = lang_id.FindLanguage(text);
|
140
|
-
if (result.language != expected_lang) {
|
141
|
-
++num_wrong;
|
142
|
-
std::cout << " Misclassification: " << std::endl;
|
143
|
-
std::cout << " Text: " << text << std::endl;
|
144
|
-
std::cout << " Expected language: " << expected_lang << std::endl;
|
145
|
-
std::cout << " Predicted language: " << result.language << std::endl;
|
146
|
-
}
|
147
|
-
}
|
148
|
-
|
149
|
-
if (num_wrong == 0) {
|
150
|
-
std::cout << " Success!" << std::endl;
|
151
|
-
return true;
|
152
|
-
} else {
|
153
|
-
std::cout << " Failure: " << num_wrong << " wrong predictions"
|
154
|
-
<< std::endl;
|
155
|
-
return false;
|
156
|
-
}
|
157
|
-
}
|
158
|
-
|
159
|
-
// Tests the model on input containing multiple languages of different scripts.
|
160
|
-
// Returns "true" if the test is successful and "false" otherwise.
|
161
|
-
bool TestMultipleLanguagesInInput() {
|
162
|
-
std::cout << "Running " << __FUNCTION__ << std::endl;
|
163
|
-
|
164
|
-
// Text containing snippets in English and Bulgarian.
|
165
|
-
const std::string text =
|
166
|
-
"This piece of text is in English. Този текст е на Български.";
|
167
|
-
|
168
|
-
// Expected language spans in the input text, corresponding respectively to
|
169
|
-
// Bulgarian and English.
|
170
|
-
const std::string expected_bg_span = " Този текст е на Български ";
|
171
|
-
const std::string expected_en_span = " This piece of text is in English ";
|
172
|
-
const float expected_byte_sum =
|
173
|
-
static_cast<float>(expected_bg_span.size() + expected_en_span.size());
|
174
|
-
|
175
|
-
// Number of languages to query for and the expected byte proportions.
|
176
|
-
const int num_queried_langs = 3;
|
177
|
-
const std::unordered_map<string, float> expected_lang_proportions{
|
178
|
-
{"bg", expected_bg_span.size() / expected_byte_sum},
|
179
|
-
{"en", expected_en_span.size() / expected_byte_sum},
|
180
|
-
{NNetLanguageIdentifier::kUnknown, 0.0}};
|
181
|
-
|
182
|
-
NNetLanguageIdentifier lang_id(/*min_num_bytes=*/0,
|
183
|
-
/*max_num_bytes=*/1000);
|
184
|
-
const std::vector<NNetLanguageIdentifier::Result> results =
|
185
|
-
lang_id.FindTopNMostFreqLangs(text, num_queried_langs);
|
186
|
-
|
187
|
-
if (results.size() != expected_lang_proportions.size()) {
|
188
|
-
std::cout << " Failure" << std::endl;
|
189
|
-
std::cout << " Wrong number of languages: expected "
|
190
|
-
<< expected_lang_proportions.size() << ", obtained "
|
191
|
-
<< results.size() << std::endl;
|
192
|
-
return false;
|
193
|
-
}
|
194
|
-
|
195
|
-
// Iterate over the results and check that the correct proportions are
|
196
|
-
// returned for the expected languages.
|
197
|
-
const float epsilon = 0.00001f;
|
198
|
-
for (const NNetLanguageIdentifier::Result &result : results) {
|
199
|
-
if (expected_lang_proportions.count(result.language) == 0) {
|
200
|
-
std::cout << " Failure" << std::endl;
|
201
|
-
std::cout << " Incorrect language: " << result.language << std::endl;
|
202
|
-
return false;
|
203
|
-
}
|
204
|
-
if (std::abs(result.proportion -
|
205
|
-
expected_lang_proportions.at(result.language)) > epsilon) {
|
206
|
-
std::cout << " Failure" << std::endl;
|
207
|
-
std::cout << " Language " << result.language << ": expected proportion "
|
208
|
-
<< expected_lang_proportions.at(result.language) << ", got "
|
209
|
-
<< result.proportion << std::endl;
|
210
|
-
return false;
|
211
|
-
}
|
212
|
-
|
213
|
-
// Skip over undefined language.
|
214
|
-
if (result.language == "und")
|
215
|
-
continue;
|
216
|
-
if (result.byte_ranges.size() != 1) {
|
217
|
-
std::cout << " Should only detect one span containing " << result.language
|
218
|
-
<< std::endl;
|
219
|
-
return false;
|
220
|
-
}
|
221
|
-
// Check that specified byte ranges for language are correct.
|
222
|
-
int start_index = result.byte_ranges[0].start_index;
|
223
|
-
int end_index = result.byte_ranges[0].end_index;
|
224
|
-
std::string byte_ranges_text = text.substr(start_index, end_index - start_index);
|
225
|
-
if (result.language == "bg") {
|
226
|
-
if (byte_ranges_text.compare("Този текст е на Български.") != 0) {
|
227
|
-
std::cout << " Incorrect byte ranges returned for Bulgarian " << std::endl;
|
228
|
-
return false;
|
229
|
-
}
|
230
|
-
} else if (result.language == "en") {
|
231
|
-
if (byte_ranges_text.compare("This piece of text is in English. ") != 0) {
|
232
|
-
std::cout << " Incorrect byte ranges returned for English " << std::endl;
|
233
|
-
return false;
|
234
|
-
}
|
235
|
-
} else {
|
236
|
-
std::cout << " Got language other than English or Bulgarian "
|
237
|
-
<< std::endl;
|
238
|
-
return false;
|
239
|
-
}
|
240
|
-
}
|
241
|
-
std::cout << " Success!" << std::endl;
|
242
|
-
return true;
|
243
|
-
}
|
244
|
-
|
245
|
-
} // namespace nnet_lang_id_test
|
246
|
-
} // namespace chrome_lang_id
|
247
|
-
|
248
|
-
// Runs tests for the language identification model.
|
249
|
-
int main(int argc, char **argv) {
|
250
|
-
const bool tests_successful =
|
251
|
-
chrome_lang_id::nnet_lang_id_test::TestPredictions() &&
|
252
|
-
chrome_lang_id::nnet_lang_id_test::TestMultipleLanguagesInInput();
|
253
|
-
return tests_successful ? 0 : 1;
|
254
|
-
}
|
@@ -1,529 +0,0 @@
|
|
1
|
-
/* Copyright 2016 Google Inc. All Rights Reserved.
|
2
|
-
|
3
|
-
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
you may not use this file except in compliance with the License.
|
5
|
-
You may obtain a copy of the License at
|
6
|
-
|
7
|
-
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
|
9
|
-
Unless required by applicable law or agreed to in writing, software
|
10
|
-
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
See the License for the specific language governing permissions and
|
13
|
-
limitations under the License.
|
14
|
-
==============================================================================*/
|
15
|
-
|
16
|
-
#include "nnet_lang_id_test_data.h"
|
17
|
-
|
18
|
-
namespace chrome_lang_id {
|
19
|
-
|
20
|
-
const char *const NNetLangIdTestData::kTestStrAF =
|
21
|
-
"Dit is 'n kort stukkie van die teks wat gebruik sal word vir die toets "
|
22
|
-
"van die akkuraatheid van die nuwe benadering.";
|
23
|
-
|
24
|
-
const char *const NNetLangIdTestData::kTestStrAR = "احتيالية بيع أي حساب";
|
25
|
-
|
26
|
-
const char *const NNetLangIdTestData::kTestStrAZ =
|
27
|
-
" a az qalıb breyn rinq intellektual oyunu üzrə yarışın zona mərhələləri "
|
28
|
-
"keçirilib miq un qalıqlarının dənizdən çıxarılması davam edir məhəmməd "
|
29
|
-
"peyğəmbərin karikaturalarını çap edən qəzetin baş redaktoru iş otağında "
|
30
|
-
"ölüb";
|
31
|
-
|
32
|
-
const char *const NNetLangIdTestData::kTestStrBE =
|
33
|
-
" а друкаваць іх не было тэхнічна магчыма бліжэй за вільню тым самым часам "
|
34
|
-
"нямецкае кіраўніцтва прапаноўвала апроч ўвядзення лацінкі яе";
|
35
|
-
|
36
|
-
const char *const NNetLangIdTestData::kTestStrBG =
|
37
|
-
" а дума попада в състояние на изпитание ключовите думи с предсказана "
|
38
|
-
"малко под то изискване на страниците за търсене в";
|
39
|
-
|
40
|
-
const char *const NNetLangIdTestData::kTestStrBN =
|
41
|
-
"গ্যালারির ৩৮ বছর পূর্তিতে মূল্যছাড় অর্থনীতি বিএনপির ওয়াক আউট তপন"
|
42
|
-
" চৌধুরী হারবাল অ্যাসোসিয়েশনের সভাপতি আন্তর্জাতিক পরামর্শক "
|
43
|
-
"বোর্ড দিয়ে শরিয়াহ্ ইনন্ডেক্স করবে "
|
44
|
-
"সিএসই মালিকপক্ষের কান্না, শ্রমিকের অনিশ্চয়তা মতিঝিলে সমাবেশ নিষিদ্ধ: "
|
45
|
-
"এফবিসিসিআইয়ের ধন্যবাদ বিনোদন বিশেষ প্রতিবেদন বাংলালিংকের গ্র্যান্ডমাস্টার "
|
46
|
-
"সিজন-৩ ব্রাজিলে বিশ্বকাপ ফুটবল আয়োজনবিরোধী বিক্ষোভ দেশের নিরাপত্তার"
|
47
|
-
" চেয়ে অনেক বেশি সচেতন । প্রার্থীদের দক্ষতা ও যোগ্যতার"
|
48
|
-
" পাশাপাশি তারা জাতীয় ইস্যুগুলোতে প্রাধান্য দিয়েছেন । ” পাঁচটি সিটিতে ২০"
|
49
|
-
" লাখ ভোটারদের দিয়ে জাতীয় নির্বাচনে ৮ কোটি ভোটারদের"
|
50
|
-
" সঙ্গে তুলনা করা যাবে কি একজন দর্শকের এমন প্রশ্নে জবাবে আব্দুল্লাহ "
|
51
|
-
"আল নোমান বলেন , “ এই পাঁচটি সিটি কর্পোরেশন নির্বাচন দেশের পাঁচটি বড়"
|
52
|
-
" বিভাগের প্রতিনিধিত্ব করছে । এছাড়া এখানকার ভোটার রা সবাই সচেতন । তারা";
|
53
|
-
|
54
|
-
const char *const NNetLangIdTestData::kTestStrBS =
|
55
|
-
"Novi predsjednik Mešihata Islamske zajednice u Srbiji (IZuS) i muftija "
|
56
|
-
"dr. Mevlud ef. Dudić izjavio je u intervjuu za Anadolu Agency (AA) kako "
|
57
|
-
"je uvjeren da će doći do vraćanja jedinstva među muslimanima i unutar "
|
58
|
-
"Islamske zajednice na prostoru Sandžaka, te da je njegova ruka pružena za "
|
59
|
-
"povratak svih u okrilje Islamske zajednice u Srbiji nakon skoro sedam "
|
60
|
-
"godina podjela u tom dijelu Srbije. Dudić je za predsjednika Mešihata IZ "
|
61
|
-
"u Srbiji izabran 4. januara, a zvanična inauguracija će biti obavljena u "
|
62
|
-
"prvoj polovini februara. Kako se očekuje, prisustvovat će joj i "
|
63
|
-
"reisu-l-ulema Islamske zajednice u Srbiji Husein ef. Kavazović koji će i "
|
64
|
-
"zvanično promovirati Dudića u novog prvog čovjeka IZ u Srbiji. Dudić će "
|
65
|
-
"danas boraviti u prvoj zvaničnoj posjeti reisu Kavazoviću, što je njegov "
|
66
|
-
"privi simbolični potez nakon imenovanja. ";
|
67
|
-
|
68
|
-
const char *const NNetLangIdTestData::kTestStrCA =
|
69
|
-
"al final en un únic lloc nhorabona l correu electrònic està concebut com "
|
70
|
-
"a eina de productivitat aleshores per què perdre el temps arxivant "
|
71
|
-
"missatges per després intentar recordar on els veu desar i per què heu d "
|
72
|
-
"eliminar missatges importants per l";
|
73
|
-
|
74
|
-
const char *const NNetLangIdTestData::kTestStrCEB =
|
75
|
-
"Ang Sugbo usa sa mga labing ugmad nga lalawigan sa nasod. Kini ang sentro "
|
76
|
-
"sa komersyo, edukasyon ug industriya sa sentral ug habagatang dapit sa "
|
77
|
-
"kapupod-an. Ang mipadayag sa Sugbo isip ikapito nga labing nindot nga "
|
78
|
-
"pulo sa , ang nag-inusarang pulo sa Pilipinas nga napasidunggan sa maong "
|
79
|
-
"magasin sukad pa sa tuig";
|
80
|
-
|
81
|
-
const char *const NNetLangIdTestData::kTestStrCS =
|
82
|
-
" a akci opakujte film uložen vykreslit gmail tokio smazat obsah adresáře "
|
83
|
-
"nelze načíst systémový profil jednotky smoot okud používáte pro určení "
|
84
|
-
"polokoule značky z západ nebo v východ používejte nezáporné hodnoty "
|
85
|
-
"zeměpisné délky nelze";
|
86
|
-
|
87
|
-
const char *const NNetLangIdTestData::kTestStrCY =
|
88
|
-
" a chofrestru eich cyfrif ymwelwch a unwaith i chi greu eich cyfrif mi "
|
89
|
-
"fydd yn cael ei hysbysu o ch cyfeiriad ebost newydd fel eich bod yn gallu "
|
90
|
-
"cadw mewn cysylltiad drwy gmail os nad ydych chi wedi clywed yn barod am "
|
91
|
-
"gmail mae n gwasanaeth gwebost";
|
92
|
-
|
93
|
-
const char *const NNetLangIdTestData::kTestStrDA =
|
94
|
-
" a z tallene og punktummer der er tilladte log ud angiv den ønskede "
|
95
|
-
"adgangskode igen november gem personlige oplysninger kontrolspørgsmål det "
|
96
|
-
"sidste tegn i dit brugernavn skal være et bogstav a z eller tal skriv de "
|
97
|
-
"tegn du kan se i billedet nedenfor";
|
98
|
-
|
99
|
-
const char *const NNetLangIdTestData::kTestStrDE =
|
100
|
-
" abschnitt ordner aktivieren werden die ordnereinstellungen im "
|
101
|
-
"farbabschnitt deaktiviert öchten sie wirklich fortfahren eldtypen angeben "
|
102
|
-
"optional n diesem schritt geben sie für jedesfeld aus dem datenset den "
|
103
|
-
"typ an ieser schritt ist optional eldtypen";
|
104
|
-
|
105
|
-
const char *const NNetLangIdTestData::kTestStrEL =
|
106
|
-
" ή αρνητική αναζήτηση λέξης κλειδιού καταστήστε τις μεμονωμένες λέξεις "
|
107
|
-
"κλειδιά περισσότερο στοχοθετημένες με τη μετατροπή τους σε";
|
108
|
-
|
109
|
-
const char *const NNetLangIdTestData::kTestStrEN =
|
110
|
-
" a backup credit card by visiting your billing preferences page or visit "
|
111
|
-
"the adwords help centre for more details https adwords google com support "
|
112
|
-
"bin answer py answer hl en we were unable to process the payment of for "
|
113
|
-
"your outstanding google adwords";
|
114
|
-
|
115
|
-
const char *const NNetLangIdTestData::kTestStrEO =
|
116
|
-
" a jarcento refoje per enmetado de koncerna pastro tiam de reformita "
|
117
|
-
"konfesio ekde refoje ekzistis luteranaj komunumanoj tamen tiuj fondis "
|
118
|
-
"propran komunumon nur en ambaŭ apartenis ekde al la evangela eklezio en "
|
119
|
-
"prusio resp ties rejnlanda provinceklezio en";
|
120
|
-
|
121
|
-
const char *const NNetLangIdTestData::kTestStrES =
|
122
|
-
" a continuación haz clic en el botón obtener ruta también puedes "
|
123
|
-
"desplazarte hasta el final de la página para cambiar tus opciones de "
|
124
|
-
"búsqueda gráfico y detalles ésta es una lista de los vídeos que te "
|
125
|
-
"recomendamos nuestras recomendaciones se basan";
|
126
|
-
|
127
|
-
const char *const NNetLangIdTestData::kTestStrET =
|
128
|
-
" a niipea kui sinu maksimaalne igakuine krediidi limiit on meie poolt "
|
129
|
-
"heaks kiidetud on sinu kohustuseks see krediidilimiit";
|
130
|
-
|
131
|
-
const char *const NNetLangIdTestData::kTestStrEU =
|
132
|
-
" a den eraso bat honen kontra hortaz eragiketa bakarrik behar dituen "
|
133
|
-
"eraso batek aes apurtuko luke nahiz eta oraingoz eraso bideraezina izan "
|
134
|
-
"gaur egungo teknologiaren mugak direla eta oraingoz kezka hauek alde "
|
135
|
-
"batera utzi daitezke orain arteko indar";
|
136
|
-
|
137
|
-
const char *const NNetLangIdTestData::kTestStrFA =
|
138
|
-
" آب خوردن عجله می کردند به جای باز ی کتک کاری می کردند و همه چيز مثل قبل "
|
139
|
-
"بود فقط من ماندم و يک دنيا حرف و انتظار تا عاقبت رسيد احضاريه ی ای با";
|
140
|
-
|
141
|
-
const char *const NNetLangIdTestData::kTestStrFI =
|
142
|
-
" a joilla olet käynyt tämä kerro meille kuka ä olet ei tunnistettavia "
|
143
|
-
"käyttötietoja kuten virheraportteja käytetään google desktopin "
|
144
|
-
"parantamiseen etsi näyttää mukautettuja uutisia google desktop "
|
145
|
-
"keskivaihto leikkaa voit kaksoisnapsauttaa";
|
146
|
-
|
147
|
-
const char *const NNetLangIdTestData::kTestStrFIL =
|
148
|
-
"Ito ay isang maikling piraso ng teksto na ito ay gagamitin para sa "
|
149
|
-
"pagsubok ang kawastuhan ng mga bagong diskarte.";
|
150
|
-
|
151
|
-
const char *const NNetLangIdTestData::kTestStrFR =
|
152
|
-
" a accès aux collections et aux frontaux qui lui ont été attribués il "
|
153
|
-
"peut consulter et modifier ses collections et exporter des configurations "
|
154
|
-
"de collection toutefois il ne peut pas créer ni supprimer des collections "
|
155
|
-
"enfin il a accès aux fonctions";
|
156
|
-
|
157
|
-
const char *const NNetLangIdTestData::kTestStrGA =
|
158
|
-
" a bhfuil na focail go léir i do cheist le fáil orthu ní gá ach focail "
|
159
|
-
"breise a chur leis na cinn a cuardaíodh cheana chun an cuardach a "
|
160
|
-
"bheachtú nó a chúngú má chuirtear focal breise isteach aimseofar fo aicme "
|
161
|
-
"ar leith de na torthaí a fuarthas";
|
162
|
-
|
163
|
-
const char *const NNetLangIdTestData::kTestStrGL =
|
164
|
-
" debe ser como mínimo taranto tendas de venda polo miúdo cociñas "
|
165
|
-
"servizos bordado canadá viaxes parques de vehículos de recreo hotel "
|
166
|
-
"oriental habitación recibir unha postal no enderezo indicado "
|
167
|
-
"anteriormente";
|
168
|
-
|
169
|
-
const char *const NNetLangIdTestData::kTestStrGU =
|
170
|
-
" આના પરિણામ પ્રમાણસર ફોન્ટ અવતરણ ચિન્હવાળા પાઠને છુપાવો બધા સમૂહો શોધાયા"
|
171
|
-
" હાલનો જ સંદેશ વિષયની";
|
172
|
-
|
173
|
-
const char *const NNetLangIdTestData::kTestStrHA =
|
174
|
-
" a cikin a kan sakamako daga sakwannin a kan sakamako daga sakwannin daga "
|
175
|
-
"ranar zuwa a kan sakamako daga guda daga ranar zuwa a kan sakamako daga "
|
176
|
-
"shafukan daga ranar zuwa a kan sakamako daga guda a cikin last hour a kan "
|
177
|
-
"sakamako daga guda daga kafar";
|
178
|
-
|
179
|
-
const char *const NNetLangIdTestData::kTestStrHI =
|
180
|
-
" ं ऐडवर्ड्स विज्ञापनों के अनुभव पर आधारित हैं और इनकी मदद से आपको अपने"
|
181
|
-
" विज्ञापनों का अधिकतम लाभ";
|
182
|
-
|
183
|
-
const char *const NNetLangIdTestData::kTestStrHMN =
|
184
|
-
"Qhov no yog ib tug luv luv daim ntawv nyeem uas yuav siv tau rau kev soj "
|
185
|
-
"ntsuam qhov tseeb ntawm tus tshiab mus kom ze.";
|
186
|
-
|
187
|
-
const char *const NNetLangIdTestData::kTestStrHR =
|
188
|
-
"Posljednja dva vladara su Kijaksar (Κυαξαρης; 625-585 prije Krista), "
|
189
|
-
"fraortov sin koji će proširiti teritorij Medije i Astijag. Kijaksar je "
|
190
|
-
"imao kćer ili unuku koja se zvala Amitis a postala je ženom "
|
191
|
-
"Nabukodonosora II. kojoj je ovaj izgradio Viseće vrtove Babilona. "
|
192
|
-
"Kijaksar je modernizirao svoju vojsku i uništio Ninivu 612. prije Krista. "
|
193
|
-
"Naslijedio ga je njegov sin, posljednji medijski kralj, Astijag, kojega "
|
194
|
-
"je detronizirao (srušio sa vlasti) njegov unuk Kir Veliki. Zemljom su "
|
195
|
-
"zavladali Perzijanci. Hrvatska je zemlja situacija u Europi. Ona ima "
|
196
|
-
"bogatu kulturu i ukusna jela.";
|
197
|
-
|
198
|
-
const char *const NNetLangIdTestData::kTestStrHT =
|
199
|
-
" ak pitit tout sosyete a chita se pou sa leta dwe pwoteje yo nimewo leta "
|
200
|
-
"fèt pou li pwoteje tout paran ak pitit nan peyi a menm jan kit paran yo "
|
201
|
-
"marye kit yo pa marye tout manman ki fè pitit leta fèt pou ba yo konkoul "
|
202
|
-
"menm jan tou pou timoun piti ak pou";
|
203
|
-
|
204
|
-
const char *const NNetLangIdTestData::kTestStrHU =
|
205
|
-
" a felhasználóim a google azonosító szöveget ikor látják a felhasználóim "
|
206
|
-
"a google azonosító szöveget felhasználók a google azonosító szöveget "
|
207
|
-
"fogják látni minden tranzakció után ha a vásárlását regisztrációját "
|
208
|
-
"oldalunk";
|
209
|
-
|
210
|
-
const char *const NNetLangIdTestData::kTestStrHY =
|
211
|
-
" ա յ եվ նա հիացած աչքերով նայում է հինգհարկանի շենքի տարօրինակ փոքրիկ "
|
212
|
-
"քառակուսի պատուհաններին դեռ մենք շատ ենք հետամնաց ասում է նա այսպես է";
|
213
|
-
|
214
|
-
const char *const NNetLangIdTestData::kTestStrID =
|
215
|
-
"berdiri setelah pengurusnya yang berusia 83 tahun, Fayzrahman Satarov, "
|
216
|
-
"mendeklarasikan diri sebagai nabi dan rumahnya sebagai negara Islam "
|
217
|
-
"Satarov digambarkan sebagai mantan ulama Islam tahun 1970-an. "
|
218
|
-
"Pengikutnya didorong membaca manuskripnya dan kebanyakan dilarang "
|
219
|
-
"meninggalkan tempat persembunyian bawah tanah di dasar gedung delapan "
|
220
|
-
"lantai mereka. Jaksa membuka penyelidikan kasus kriminal pada kelompok "
|
221
|
-
"itu dan menyatakan akan membubarkan kelompok kalau tetap melakukan "
|
222
|
-
"kegiatan ilegal seperti mencegah anggotanya mencari bantuan medis atau "
|
223
|
-
"pendidikan. Sampai sekarang pihak berwajib belum melakukan penangkapan "
|
224
|
-
"meskipun polisi mencurigai adanya tindak kekerasan pada anak. Pengadilan "
|
225
|
-
"selanjutnya akan memutuskan apakah anak-anak diizinkan tetap tinggal "
|
226
|
-
"dengan orang tua mereka. Kazan yang berada sekitar 800 kilometer di timur "
|
227
|
-
"Moskow merupakan wilayah Tatarstan yang";
|
228
|
-
|
229
|
-
const char *const NNetLangIdTestData::kTestStrIG =
|
230
|
-
"Chineke bụ aha ọzọ ndï omenala Igbo kpọro Chukwu. Mgbe ndị bekee bịara, "
|
231
|
-
"ha mee ya nke ndi Christian. N'echiche ndi ekpere chi Omenala Ndi Igbo, "
|
232
|
-
"Christianity, Judaism, ma Islam, Chineke nwere ọtụtụ utu aha, ma nwee "
|
233
|
-
"nanị otu aha. Ụzọ abụọ e si akpọ aha ahụ bụ Jehovah ma Ọ bụ Yahweh. Na "
|
234
|
-
"ọtụtụ Akwụkwọ Nsọ, e wepụla aha Chineke ma jiri utu aha bụ Onyenwe Anyị "
|
235
|
-
"ma ọ bụ Chineke dochie ya. Ma mgbe e dere akwụkwọ nsọ, aha ahụ bụ Jehova "
|
236
|
-
"pụtara n’ime ya, ihe dị ka ugboro pụkụ asaa(7,000).";
|
237
|
-
|
238
|
-
const char *const NNetLangIdTestData::kTestStrIS =
|
239
|
-
" a afköst leitarorða þinna leitarorð neikvæð leitarorð auglýsingahópa "
|
240
|
-
"byggja upp aðallista yfir ný leitarorð fyrir auglýsingahópana og skoða "
|
241
|
-
"ítarleg gögn um árangur leitarorða eins og samkeppni auglýsenda og "
|
242
|
-
"leitarmagn er krafist notkun";
|
243
|
-
|
244
|
-
const char *const NNetLangIdTestData::kTestStrIT =
|
245
|
-
" a causa di un intervento di manutenzione del sistema fino alle ore circa "
|
246
|
-
"ora legale costa del pacifico del novembre le campagne esistenti "
|
247
|
-
"continueranno a essere pubblicate come di consueto anche durante questo "
|
248
|
-
"breve periodo di inattività ci scusiamo per";
|
249
|
-
|
250
|
-
const char *const NNetLangIdTestData::kTestStrIW =
|
251
|
-
" או לערוך את העדפות ההפצה אנא עקוב אחרי השלבים הבאים כנס לחשבון האישי שלך "
|
252
|
-
"ב";
|
253
|
-
|
254
|
-
const char *const NNetLangIdTestData::kTestStrJA =
|
255
|
-
" このペ ジでは アカウントに指定された予算の履歴を一覧にしています "
|
256
|
-
"それぞれの項目には 予算額と特定期間のステ タスが表示されます "
|
257
|
-
"現在または今後の予算を設定するには";
|
258
|
-
|
259
|
-
const char *const NNetLangIdTestData::kTestStrJV =
|
260
|
-
"Iki Piece cendhak teks sing bakal digunakake kanggo Testing akurasi "
|
261
|
-
"pendekatan anyar.";
|
262
|
-
|
263
|
-
const char *const NNetLangIdTestData::kTestStrKA =
|
264
|
-
" ა ბირთვიდან მიღებული ელემენტი მენდელეევის პერიოდულ სიტემაში "
|
265
|
-
"გადაინაცვლებს ორი უჯრით";
|
266
|
-
|
267
|
-
const char *const NNetLangIdTestData::kTestStrKK =
|
268
|
-
" а билердің өзіне рұқсат берілмеген егер халық талап етсе ғана хан "
|
269
|
-
"келісім берген өздеріңіз білесіздер қр қыл мыс тық кодексінде жазаның";
|
270
|
-
|
271
|
-
const char *const NNetLangIdTestData::kTestStrKM =
|
272
|
-
"នេះគឺជាបំណែកខ្លីនៃអត្ថបទដែលនឹងត្រូវបានប្រើសម្រាប់ការធ្វើតេស្តភាពត្រឹមត្រូវ"
|
273
|
-
"នៃវិធីសាស្រ្តថ្មីនេះ។";
|
274
|
-
|
275
|
-
const char *const NNetLangIdTestData::kTestStrKN =
|
276
|
-
" ಂಠಯ್ಯನವರು ತುಮಕೂರು ಜಿಲ್ಲೆಯ ಚಿಕ್ಕನಾಯಕನಹಳ್ಳಿ ತಾಲ್ಲೂಕಿನ ತೀರ್ಥಪುರ ವೆಂಬ ಸಾಧಾರಣ"
|
277
|
-
" ಹಳ್ಳಿಯ ಶ್ಯಾನುಭೋಗರ";
|
278
|
-
|
279
|
-
const char *const NNetLangIdTestData::kTestStrKO =
|
280
|
-
" 개별적으로 리포트 액세스 권한을 부여할 수 있습니다 액세스 권한 "
|
281
|
-
"부여사용자에게 프로필 리포트에 액세스할 수 있는 권한을 부여하시려면 가용 "
|
282
|
-
"프로필 상자에서 프로필 이름을 선택한 다음";
|
283
|
-
|
284
|
-
const char *const NNetLangIdTestData::kTestStrLA =
|
285
|
-
" a deo qui enim nocendi causa mentiri solet si iam consulendi causa "
|
286
|
-
"mentiatur multum profecit sed aliud est quod per se ipsum laudabile "
|
287
|
-
"proponitur aliud quod in deterioris comparatione praeponitur aliter enim "
|
288
|
-
"gratulamur cum sanus est homo aliter cum melius";
|
289
|
-
|
290
|
-
const char *const NNetLangIdTestData::kTestStrLO =
|
291
|
-
" ກຫາທົ່ວທັງເວັບ ແລະໃນເວັບໄຮ້ສາຍ ທຳອິດໃຫ້ທຳການຊອກຫາກ່ອນ ຈາກນັ້ນ"
|
292
|
-
" ໃຫ້ກົດປຸ່ມເມນູ ໃນໜ້າຜົນໄດ້";
|
293
|
-
|
294
|
-
const char *const NNetLangIdTestData::kTestStrLT =
|
295
|
-
" a išsijungia mano idėja dėl geriausio laiko po pastarųjų savo santykių "
|
296
|
-
"pasimokiau penki dalykai be kurių negaliu gyventi mano miegamajame tu "
|
297
|
-
"surasi ideali pora išsilavinimas aukštoji mokykla koledžas universitetas "
|
298
|
-
"pagrindinis laipsnis metai";
|
299
|
-
|
300
|
-
const char *const NNetLangIdTestData::kTestStrLV =
|
301
|
-
" a gadskārtējā izpārdošana slēpošana jāņi atlaide izmaiņas trafikā kas "
|
302
|
-
"saistītas ar sezonas izpārdošanu speciālajām atlaidēm u c ir parastas un "
|
303
|
-
"atslēgvārdi kas ir populāri noteiktos laika posmos šajā laikā saņems "
|
304
|
-
"lielāku klikšķu";
|
305
|
-
|
306
|
-
const char *const NNetLangIdTestData::kTestStrMG =
|
307
|
-
" amporisihin i ianao mba hijery ny dika teksta ranofotsiny an ity "
|
308
|
-
"lahatsoratra ity tsy ilaina ny opérateur efa karohina daholo ny teny "
|
309
|
-
"rehetra nosoratanao ampiasao anaovana dokambarotra i google telugu datin "
|
310
|
-
"ny takelaka fikarohana sary renitakelak i";
|
311
|
-
|
312
|
-
const char *const NNetLangIdTestData::kTestStrMI =
|
313
|
-
" haere ki te kainga o o haere ki te kainga o o haere ki te kainga o te "
|
314
|
-
"rapunga ahua o haere ki te kainga o ka tangohia he ki to rapunga kaore au "
|
315
|
-
"mohio te tikanga whakatiki o te ra he whakaharuru te pai rapunga a te "
|
316
|
-
"rapunga ahua a e kainga o nga awhina o te";
|
317
|
-
|
318
|
-
const char *const NNetLangIdTestData::kTestStrMK =
|
319
|
-
" гласовите коалицијата на вмро дпмне како партија со најмногу освоени "
|
320
|
-
"гласови ќе добие евра а на сметката на коализијата за македонија";
|
321
|
-
|
322
|
-
const char *const NNetLangIdTestData::kTestStrML =
|
323
|
-
" ം അങ്ങനെ ഞങ്ങള് അവരുടെ മുമ്പില് നിന്നു ഔടും ഉടനെ നിങ്ങള് പതിയിരിപ്പില് "
|
324
|
-
"നിന്നു എഴുന്നേറ്റു";
|
325
|
-
|
326
|
-
const char *const NNetLangIdTestData::kTestStrMN =
|
327
|
-
" а боловсронгуй болгох орон нутгийн ажил үйлсийг уялдуулж зохицуулах "
|
328
|
-
"дүрэм журам боловсруулах орон нутгийн өмч хөрөнгө санхүүгийн";
|
329
|
-
|
330
|
-
const char *const NNetLangIdTestData::kTestStrMR =
|
331
|
-
"हैदराबाद उच्चार ऐका (सहाय्य·माहिती)तेलुगू: హైదరాబాదు , उर्दू:"
|
332
|
-
" حیدر آباد हे भारतातील आंध्र प्रदेश राज्याच्या राजधानीचे शहर"
|
333
|
-
" आहे. हैदराबादची लोकसंख्या ७७ लाख ४० हजार ३३४ आहे. मोत्यांचे शहर"
|
334
|
-
" अशी एकेकाळी ओळख असलेल्या या शहराला ऐतिहासिक, सांस्कृतिक आणि "
|
335
|
-
"स्थापत्यशास्त्रीय वारसा लाभला आहे. १९९० नंतर शिक्षण आणि माहिती तंत्रज्ञान"
|
336
|
-
" त्याचप्रमाणे औषधनिर्मिती आणि जैवतंत्रज्ञान क्षेत्रातील उद्योगधंद्यांची"
|
337
|
-
" वाढ शहरात झाली. दक्षिण मध्य भारतातील पर्यटन आणि तेलुगू चित्रपटनिर्मितीचे"
|
338
|
-
" हैदराबाद हे केंद्र आहे";
|
339
|
-
|
340
|
-
const char *const NNetLangIdTestData::kTestStrMS =
|
341
|
-
"pengampunan beramai-ramai supaya mereka pulang ke rumah masing-masing. "
|
342
|
-
"Orang-orang besarnya enggan mengiktiraf sultan yang dilantik oleh Belanda "
|
343
|
-
"sebagai Yang DiPertuan Selangor. Orang ramai pula tidak mahu menjalankan "
|
344
|
-
"perniagaan bijih timah dengan Belanda, selagi raja yang berhak tidak "
|
345
|
-
"ditabalkan. Perdagang yang lain dibekukan terus kerana untuk membalas "
|
346
|
-
"jasa beliau yang membantu Belanda menentang Riau, Johor dan Selangor. Di "
|
347
|
-
"antara tiga orang Sultan juga dipandang oleh rakyat sebagai seorang "
|
348
|
-
"sultan yang paling gigih. 1 | 2 SULTAN Sebagai ganti Sultan Ibrahim "
|
349
|
-
"ditabalkan Raja Muhammad iaitu Raja Muda. Walaupun baginda bukan anak "
|
350
|
-
"isteri pertama bergelar Sultan Muhammad bersemayam di Kuala Selangor "
|
351
|
-
"juga. Pentadbiran baginda yang lemah itu menyebabkan Kuala Selangor "
|
352
|
-
"menjadi sarang ioleh Cina di Lukut tidak diambil tindakan, sedangkan "
|
353
|
-
"baginda sendiri banyak berhutang kepada 1";
|
354
|
-
|
355
|
-
const char *const NNetLangIdTestData::kTestStrMT =
|
356
|
-
" ata ikteb messaġġ lil indirizzi differenti billi tagħżilhom u tagħfas il "
|
357
|
-
"buttuna ikteb żid numri tfittxijja tal kotba mur print home kotba minn "
|
358
|
-
"pagni ghal pagna minn ghall ktieb ta aċċessa stieden habib iehor grazzi "
|
359
|
-
"it tim tal gruppi google";
|
360
|
-
|
361
|
-
const char *const NNetLangIdTestData::kTestStrMY =
|
362
|
-
" တက္ကသုိလ္ မ္ဟ ပ္ရန္ လာ္ရပီးေနာက္ န္ဟစ္ အရ္ဝယ္ ဦးသန္ ့သည္ ပန္"
|
363
|
-
" းတနော္ အမ္ယုိးသား ေက္ယာင္ း";
|
364
|
-
|
365
|
-
const char *const NNetLangIdTestData::kTestStrNE =
|
366
|
-
"अरू ठाऊँबाटपनि खुलेको छ यो खाता अर अरू ठाऊँबाटपनि खुलेको छ यो खाता अर ू";
|
367
|
-
|
368
|
-
const char *const NNetLangIdTestData::kTestStrNL =
|
369
|
-
" a als volgt te werk om een configuratiebestand te maken sitemap gen py "
|
370
|
-
"ebruik filters om de s op te geven die moeten worden toegevoegd of "
|
371
|
-
"uitgesloten op basis van de opmaaktaal elke sitemap mag alleen de s "
|
372
|
-
"bevatten voor een bepaalde opmaaktaal dit";
|
373
|
-
|
374
|
-
const char *const NNetLangIdTestData::kTestStrNO =
|
375
|
-
" a er obligatorisk tidsforskyvning plassering av katalogsøk "
|
376
|
-
"planinformasjon loggfilbane gruppenavn kontoinformasjon passord domene "
|
377
|
-
"gruppeinformasjon alle kampanjesporing alternativ bruker grupper "
|
378
|
-
"oppgaveplanlegger oppgavehistorikk kontosammendrag antall";
|
379
|
-
|
380
|
-
const char *const NNetLangIdTestData::kTestStrNY =
|
381
|
-
"Boma ndi gawo la dziko lomwe linapangidwa ndi cholinga chothandiza "
|
382
|
-
"ntchito yolamulira. Kuŵalako kulikuunikabe mandita, Edipo nyima "
|
383
|
-
"unalephera kugonjetsa kuŵalako.";
|
384
|
-
|
385
|
-
const char *const NNetLangIdTestData::kTestStrPA =
|
386
|
-
" ਂ ਦਿਨਾਂ ਵਿਚ ਭਾਈ ਸਾਹਿਬ ਦੀ ਬੁੱਚੜ ਗੋਬਿੰਦ ਰਾਮ ਨਾਲ ਅੜਫਸ ਚੱਲ ਰਹੀ ਸੀ ਗੋਬਿੰਦ"
|
387
|
-
" ਰਾਮ ਨੇ ਭਾਈ ਸਾਹਿਬ ਦੀਆਂ ਭੈਣਾ";
|
388
|
-
|
389
|
-
const char *const NNetLangIdTestData::kTestStrPL =
|
390
|
-
" a australii będzie widział inne reklamy niż użytkownik z kanady "
|
391
|
-
"kierowanie geograficzne sprawia że reklamy są lepiej dopasowane do "
|
392
|
-
"użytkownika twojej strony oznacza to także że możesz nie zobaczyć "
|
393
|
-
"wszystkich reklam które są wyświetlane na";
|
394
|
-
|
395
|
-
const char *const NNetLangIdTestData::kTestStrPT =
|
396
|
-
" a abit prevê que a entrada desses produtos estrangeiros no mercado "
|
397
|
-
"têxtil e vestuário do brasil possa reduzir os preços em cerca de a partir "
|
398
|
-
"de má notícia para os empresários que terão que lutar para garantir suas "
|
399
|
-
"margens de lucro mas boa notícia";
|
400
|
-
|
401
|
-
const char *const NNetLangIdTestData::kTestStrRO =
|
402
|
-
" a anunţurilor reţineţi nu plătiţi pentru clicuri sau impresii ci numai "
|
403
|
-
"atunci când pe site ul dvs survine o acţiune dorită site urile negative "
|
404
|
-
"nu pot avea uri de destinaţie daţi instrucţiuni societăţii dvs bancare "
|
405
|
-
"sau constructoare să";
|
406
|
-
|
407
|
-
const char *const NNetLangIdTestData::kTestStrRU =
|
408
|
-
" а неправильный формат идентификатора дн назад";
|
409
|
-
|
410
|
-
const char *const NNetLangIdTestData::kTestStrSI =
|
411
|
-
" අනුරාධ මිහිඳුකුල නමින් සකුරා ට ලිපියක් තැපෑලෙන් එවා තිබුණා කි "
|
412
|
-
"් රස්ටි ෂෙල්ටන් ප ් රනාන්දු ද";
|
413
|
-
|
414
|
-
const char *const NNetLangIdTestData::kTestStrSK =
|
415
|
-
" a aktivovať reklamnú kampaň ak chcete kampaň pred spustením ešte "
|
416
|
-
"prispôsobiť uložte ju ako šablónu a pokračujte v úprave vyberte si jednu "
|
417
|
-
"z možností nižšie a kliknite na tlačidlo uložiť kampaň nastavenia kampane "
|
418
|
-
"môžete ľubovoľne";
|
419
|
-
|
420
|
-
const char *const NNetLangIdTestData::kTestStrSL =
|
421
|
-
" adsense stanje prijave za google adsense google adsense račun je bil "
|
422
|
-
"začasno zamrznjen pozdravljeni hvala za vaše zanimanje v google adsense "
|
423
|
-
"po pregledu vaše prijavnice so naši strokovnjaki ugotovili da spletna "
|
424
|
-
"stran ki je trenutno povezana z vašim";
|
425
|
-
|
426
|
-
const char *const NNetLangIdTestData::kTestStrSO =
|
427
|
-
" a oo maanta bogga koobaad ugu qoran yahey beesha caalamka laakiin si "
|
428
|
-
"kata oo beesha caalamku ula guntato soomaaliya waxa aan shaki ku jirin in "
|
429
|
-
"aakhirataanka dadka soomaalida oo kaliya ay yihiin ku soomaaliya ka saari "
|
430
|
-
"kara dhibka ay ku jirto";
|
431
|
-
|
432
|
-
const char *const NNetLangIdTestData::kTestStrSQ =
|
433
|
-
" a do të kërkoni nga beogradi që të njohë pavarësinë e kosovës zoti thaçi "
|
434
|
-
"prishtina është gati ta njoh pavarësinë e serbisë ndërsa natyrisht se do "
|
435
|
-
"të kërkohet një gjë e tillë që edhe beogradi ta njoh shtetin e pavarur "
|
436
|
-
"dhe sovran të";
|
437
|
-
|
438
|
-
const char *const NNetLangIdTestData::kTestStrSR =
|
439
|
-
"балчак балчак на мапи србије уреди демографија у насељу балчак живи "
|
440
|
-
"пунолетна становника а просечна старост становништва износи година";
|
441
|
-
|
442
|
-
const char *const NNetLangIdTestData::kTestStrST =
|
443
|
-
" bang ba nang le thahasello matshwao a sehlooho thuto e thehilweng hodima "
|
444
|
-
"diphetho ke tsela ya ho ruta le ho ithuta e totobatsang hantle seo "
|
445
|
-
"baithuti ba lokelang ho se fihlella ntlhatheo eo e sebetsang ka yona ke "
|
446
|
-
"ya hore titjhere o hlakisa pele seo";
|
447
|
-
|
448
|
-
const char *const NNetLangIdTestData::kTestStrSU =
|
449
|
-
"Nu ngatur kahirupan warga, keur kapentingan pamarentahan diatur ku RT, RW "
|
450
|
-
"jeung Kepala Dusun, sedengkeun urusan adat dipupuhuan ku Kuncen jeung "
|
451
|
-
"kepala adat. Sanajan Kampung Kuta teu pati anggang jeung lembur sejenna "
|
452
|
-
"nu aya di wewengkon Desa Pasir Angin, tapi boh wangunan imah atawa "
|
453
|
-
"tradisi kahirupan masarakatna nenggang ti nu lian.";
|
454
|
-
|
455
|
-
const char *const NNetLangIdTestData::kTestStrSV =
|
456
|
-
" a bort objekt från google desktop post äldst meny öretag dress etaljer "
|
457
|
-
"alternativ för vad är inne yaste google skrivbord plugin program för "
|
458
|
-
"nyheter google visa nyheter som är anpassade efter de artiklar som du "
|
459
|
-
"läser om du till exempel läser";
|
460
|
-
|
461
|
-
const char *const NNetLangIdTestData::kTestStrSW =
|
462
|
-
" a ujumbe mpya jumla unda tafuta na angalia vikundi vya kujadiliana na "
|
463
|
-
"kushiriki mawazo iliyopangwa kwa tarehe watumiaji wapya futa orodha hizi "
|
464
|
-
"lugha hoja vishikanisho vilivyo dhaminiwa ujumbe sanaa na tamasha toka "
|
465
|
-
"udhibitisho wa neno kwa haraka fikia";
|
466
|
-
|
467
|
-
const char *const NNetLangIdTestData::kTestStrTA =
|
468
|
-
" அங்கு ராஜேந்திர சோழனால் கட்டப்பட்ட பிரம்மாண்டமான சிவன் கோவில் ஒன்றும்"
|
469
|
-
" உள்ளது தொகு";
|
470
|
-
|
471
|
-
const char *const NNetLangIdTestData::kTestStrTE =
|
472
|
-
" ఁ దనర జయించిన తత్వ మరసి చూడఁ దాన యగును రాజయోగి యిట్లు తేజరిల్లుచు నుండు "
|
473
|
-
"విశ్వదాభిరామ వినర వేమ";
|
474
|
-
|
475
|
-
const char *const NNetLangIdTestData::kTestStrTG =
|
476
|
-
" адолат ва инсондӯстиро бар фашизм нажодпарастӣ ва адоват тарҷеҳ додааст "
|
477
|
-
"чоп кунед ба дигарон фиристед чоп кунед ба дигарон фиристед";
|
478
|
-
|
479
|
-
const char *const NNetLangIdTestData::kTestStrTH =
|
480
|
-
" กฏในการค้นหา หรือหน้าเนื้อหา หากท่านเลือกลงโฆษณา "
|
481
|
-
"ท่านอาจจะปรับต้องเพิ่มงบประมาณรายวันตา";
|
482
|
-
|
483
|
-
const char *const NNetLangIdTestData::kTestStrTR =
|
484
|
-
" a ayarlarınızı görmeniz ve yönetmeniz içindir eğer kampanyanız için "
|
485
|
-
"günlük bütçenizi gözden geçirebileceğiniz yeri arıyorsanız kampanya "
|
486
|
-
"yönetimi ne gidin kampanyanızı seçin ve kampanya ayarlarını düzenle yi "
|
487
|
-
"tıklayın sunumu";
|
488
|
-
|
489
|
-
const char *const NNetLangIdTestData::kTestStrUK =
|
490
|
-
" а більший бюджет щоб забезпечити собі максимум прибутків від переходів "
|
491
|
-
"відстежуйте свої об яви за датою географічним розташуванням";
|
492
|
-
|
493
|
-
const char *const NNetLangIdTestData::kTestStrUR =
|
494
|
-
" آپ کو کم سے کم ممکنہ رقم چارج کرتا ہے اس کی مثال کے طور پر فرض کریں اگر "
|
495
|
-
"آپ کی زیادہ سے زیادہ قیمت فی کلِک امریکی ڈالر اور کلِک کرنے کی شرح ہو تو";
|
496
|
-
|
497
|
-
const char *const NNetLangIdTestData::kTestStrUZ =
|
498
|
-
" abadiylashtirildi aqsh ayol prezidentga tayyormi markaziy osiyo afg "
|
499
|
-
"onistonga qanday yordam berishi mumkin ukrainada o zbekistonlik "
|
500
|
-
"muhojirlar tazyiqdan shikoyat qilmoqda gruziya va ukraina hozircha natoga "
|
501
|
-
"qabul qilinmaydi afg oniston o zbekistonni g";
|
502
|
-
|
503
|
-
const char *const NNetLangIdTestData::kTestStrVI =
|
504
|
-
" adsense cho nội dung nhà cung cấp dịch vụ di động xác minh tín"
|
505
|
-
" dụng thay đổi nhãn kg các ô xem chi phí cho từ chối các đơn đặt"
|
506
|
-
" hàng dạng cấp dữ liệu ác minh trang web của bạn để xem";
|
507
|
-
|
508
|
-
const char *const NNetLangIdTestData::kTestStrYI =
|
509
|
-
"אן פאנטאזיע ער איז באקאנט צים מערסטן פאר זיינע באַלאַדעס ער האָט געוווינט "
|
510
|
-
"אין ווארשע יעס פאריס ליווערפול און לאנדאן סוף כל סוף איז ער";
|
511
|
-
|
512
|
-
const char *const NNetLangIdTestData::kTestStrYO =
|
513
|
-
" abinibi han ikawe alantakun le ni opolopo ede abinibi ti a to lesese bi "
|
514
|
-
"eniyan to fe lo se fe lati se atunse jowo mo pe awon oju iwe itakunagbaye "
|
515
|
-
"miran ti ako ni oniruru ede abinibi le faragba nipa atunse ninu se iwadi "
|
516
|
-
"blogs ni ori itakun agbaye ti e ba";
|
517
|
-
|
518
|
-
const char *const NNetLangIdTestData::kTestStrZH =
|
519
|
-
"产品的简报和公告 提交该申请后无法进行更改 请确认您的选择是正确的 "
|
520
|
-
"对于要提交的图书 我确认 我是版权所有者或已得到版权所有者的授权 "
|
521
|
-
"要更改您的国家 地区 请在此表的最上端更改您的";
|
522
|
-
|
523
|
-
const char *const NNetLangIdTestData::kTestStrZU =
|
524
|
-
" ana engu uma inkinga iqhubeka siza ubike kwi isexwayiso ngenxa yephutha "
|
525
|
-
"lomlekeleli sikwazi ukubuyisela emuva kuphela imiphumela engaqediwe "
|
526
|
-
"ukuthola imiphumela eqediwe zama ukulayisha kabusha leli khasi emizuzwini "
|
527
|
-
"engu uma inkinga iqhubeka siza uthumele";
|
528
|
-
|
529
|
-
} // namespace chrome_lang_id
|