language_detection 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +19 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +85 -0
- data/Rakefile +11 -0
- data/ext/cld/Makefile +34 -0
- data/ext/cld/base/basictypes.h +348 -0
- data/ext/cld/base/build_config.h +124 -0
- data/ext/cld/base/casts.h +156 -0
- data/ext/cld/base/commandlineflags.h +443 -0
- data/ext/cld/base/crash.h +41 -0
- data/ext/cld/base/dynamic_annotations.h +358 -0
- data/ext/cld/base/global_strip_options.h +59 -0
- data/ext/cld/base/log_severity.h +46 -0
- data/ext/cld/base/logging.h +1403 -0
- data/ext/cld/base/macros.h +243 -0
- data/ext/cld/base/port.h +54 -0
- data/ext/cld/base/scoped_ptr.h +428 -0
- data/ext/cld/base/stl_decl.h +0 -0
- data/ext/cld/base/stl_decl_msvc.h +107 -0
- data/ext/cld/base/string_util.h +29 -0
- data/ext/cld/base/strtoint.h +93 -0
- data/ext/cld/base/template_util.h +96 -0
- data/ext/cld/base/type_traits.h +198 -0
- data/ext/cld/base/vlog_is_on.h +143 -0
- data/ext/cld/cld.so +0 -0
- data/ext/cld/encodings/compact_lang_det/cldutil.cc +905 -0
- data/ext/cld/encodings/compact_lang_det/cldutil.h +1205 -0
- data/ext/cld/encodings/compact_lang_det/cldutil_dbg.h +76 -0
- data/ext/cld/encodings/compact_lang_det/cldutil_dbg_empty.cc +76 -0
- data/ext/cld/encodings/compact_lang_det/compact_lang_det.cc +62 -0
- data/ext/cld/encodings/compact_lang_det/compact_lang_det.h +145 -0
- data/ext/cld/encodings/compact_lang_det/compact_lang_det_impl.cc +2574 -0
- data/ext/cld/encodings/compact_lang_det/compact_lang_det_impl.h +173 -0
- data/ext/cld/encodings/compact_lang_det/compact_lang_det_unittest_small.cc +406 -0
- data/ext/cld/encodings/compact_lang_det/compile.cmd +1 -0
- data/ext/cld/encodings/compact_lang_det/ext_lang_enc.cc +545 -0
- data/ext/cld/encodings/compact_lang_det/ext_lang_enc.h +119 -0
- data/ext/cld/encodings/compact_lang_det/generated/cld_generated_score_deltaoctachrome_0406.cc +380 -0
- data/ext/cld/encodings/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc +382 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc +49 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc +7119 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz_0.cc +61 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc +1263 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_longwords8_0.cc +53 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_meanscore.h +10 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_quads_0.cc +50 -0
- data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc +70935 -0
- data/ext/cld/encodings/compact_lang_det/getonescriptspan.cc +570 -0
- data/ext/cld/encodings/compact_lang_det/getonescriptspan.h +131 -0
- data/ext/cld/encodings/compact_lang_det/letterscript_enum.cc +117 -0
- data/ext/cld/encodings/compact_lang_det/letterscript_enum.h +99 -0
- data/ext/cld/encodings/compact_lang_det/subsetsequence.cc +259 -0
- data/ext/cld/encodings/compact_lang_det/subsetsequence.h +44 -0
- data/ext/cld/encodings/compact_lang_det/subsetsequence_unittest.cc +99 -0
- data/ext/cld/encodings/compact_lang_det/tote.cc +299 -0
- data/ext/cld/encodings/compact_lang_det/tote.h +89 -0
- data/ext/cld/encodings/compact_lang_det/unittest_data.h +193 -0
- data/ext/cld/encodings/compact_lang_det/utf8propjustletter.h +1162 -0
- data/ext/cld/encodings/compact_lang_det/utf8propletterscriptnum.h +1222 -0
- data/ext/cld/encodings/compact_lang_det/utf8scannotjustletterspecial.h +1185 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_basictypes.h +10 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_commandlineflags.h +28 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_google.h +18 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils.h +13 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils_google3.cc +32 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils_windows.cc +29 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_logging.h +21 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_macros.h +19 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_strtoint.h +26 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_unicodetext.cc +84 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_unicodetext.h +40 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_unilib.h +15 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_unilib_google3.cc +18 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_unilib_windows.cc +29 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf.h +24 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf8statetable.cc +224 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf8statetable.h +141 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils.h +22 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils_google3.cc +18 -0
- data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils_windows.cc +17 -0
- data/ext/cld/encodings/compact_lang_det/win/normalizedunicodetext.cc +172 -0
- data/ext/cld/encodings/compact_lang_det/win/normalizedunicodetext.h +67 -0
- data/ext/cld/encodings/internal/encodings.cc +12 -0
- data/ext/cld/encodings/lang_enc.h +254 -0
- data/ext/cld/encodings/proto/encodings.pb.h +169 -0
- data/ext/cld/encodings/public/encodings.h +301 -0
- data/ext/cld/extconf.rb +1 -0
- data/ext/cld/language_detection.cc +88 -0
- data/ext/cld/languages/internal/languages.cc +337 -0
- data/ext/cld/languages/proto/languages.pb.h +179 -0
- data/ext/cld/languages/public/languages.h +379 -0
- data/language_detection.gemspec +28 -0
- data/lib/language_detection/string.rb +1 -0
- data/lib/language_detection/version.rb +3 -0
- data/lib/language_detection.rb +54 -0
- data/test/_helper.rb +15 -0
- data/test/fixtures/languages.csv +80 -0
- data/test/language_detection_test.rb +88 -0
- metadata +250 -0
@@ -0,0 +1,379 @@
|
|
1
|
+
// Copyright (c) 2009 The Chromium Authors. All rights reserved.
|
2
|
+
// Use of this source code is governed by a BSD-style license that can be
|
3
|
+
// found in the LICENSE file.
|
4
|
+
|
5
|
+
#ifndef LANGUAGES_PUBLIC_LANGUAGES_H_
|
6
|
+
#define LANGUAGES_PUBLIC_LANGUAGES_H_
|
7
|
+
|
8
|
+
// This interface defines the Language enum and functions that depend
|
9
|
+
// only on Language values.
|
10
|
+
|
11
|
+
// A hash-function for Language, hash<Language>, is defined in
|
12
|
+
// i18n/languages/public/languages-hash.h
|
13
|
+
|
14
|
+
#ifndef SWIG
|
15
|
+
// Language enum defined in languages.proto
|
16
|
+
// Also description on how to add languages.
|
17
|
+
#include "languages/proto/languages.pb.h"
|
18
|
+
|
19
|
+
// We need this for compatibility:
|
20
|
+
// - The Language enum in the default namespace.
|
21
|
+
// COMMENTED OUT TO REDUCE DEPENDENCIES ON GOOGLE3 CODE
|
22
|
+
//using namespace i18n::languages;
|
23
|
+
|
24
|
+
#else
|
25
|
+
// And we must have a swig-compatible enum.
|
26
|
+
// This one is a simple cleaned up version of language.proto, making the enum
|
27
|
+
// compatible with C++.
|
28
|
+
#include "i18n/languages/internal/languages_proto_wrapper.h"
|
29
|
+
|
30
|
+
#endif
|
31
|
+
|
32
|
+
// COMMENTED OUT TO REDUCE DEPENDENCIES ON GOOGLE3 CODE
|
33
|
+
//#include "util/utf8/proptables/script_enum.h"
|
34
|
+
|
35
|
+
const int kNumLanguages = NUM_LANGUAGES;
|
36
|
+
|
37
|
+
// Return the default language (ENGLISH).
|
38
|
+
Language default_language();
|
39
|
+
|
40
|
+
|
41
|
+
// *******************************************
|
42
|
+
// Language predicates
|
43
|
+
// IsValidLanguage()
|
44
|
+
// IS_LANGUAGE_UNKNOWN()
|
45
|
+
// IsCJKLanguage()
|
46
|
+
// IsChineseLanguage()
|
47
|
+
// IsNorwegianLanguage()
|
48
|
+
// IsPortugueseLanguage()
|
49
|
+
// IsRightToLeftLanguage()
|
50
|
+
// IsMaybeRightToLeftLanguage()
|
51
|
+
// IsSameLanguage()
|
52
|
+
// IsScriptRequiringLongerSnippets()
|
53
|
+
// *******************************************
|
54
|
+
|
55
|
+
// IsValidLanguage
|
56
|
+
// ===============
|
57
|
+
//
|
58
|
+
// Function to check if the input is within range of the Language enum. If
|
59
|
+
// IsValidLanguage(lang) returns true, it is safe to call
|
60
|
+
// static_cast<Language>(lang).
|
61
|
+
//
|
62
|
+
inline bool IsValidLanguage(int lang) {
|
63
|
+
return ((lang >= 0) && (lang < kNumLanguages));
|
64
|
+
}
|
65
|
+
|
66
|
+
// Return true if the language is "unknown". (This function was
|
67
|
+
// previously a macro, hence the spelling in all caps.)
|
68
|
+
//
|
69
|
+
inline bool IS_LANGUAGE_UNKNOWN(Language lang) {
|
70
|
+
return lang == TG_UNKNOWN_LANGUAGE || lang == UNKNOWN_LANGUAGE;
|
71
|
+
}
|
72
|
+
|
73
|
+
// IsCJKLanguage
|
74
|
+
// -------------
|
75
|
+
//
|
76
|
+
// This function returns true if the language is either Chinese
|
77
|
+
// (simplified or traditional), Japanese, or Korean.
|
78
|
+
bool IsCJKLanguage(Language lang);
|
79
|
+
|
80
|
+
// IsChineseLanguage
|
81
|
+
// -----------------
|
82
|
+
//
|
83
|
+
// This function returns true if the language is either Chinese
|
84
|
+
// (simplified or traditional)
|
85
|
+
bool IsChineseLanguage(Language lang);
|
86
|
+
|
87
|
+
// IsNorwegianLanguage
|
88
|
+
// --------------------
|
89
|
+
//
|
90
|
+
// This function returns true if the language is any of the Norwegian
|
91
|
+
// (regular or Nynorsk).
|
92
|
+
bool IsNorwegianLanguage(Language lang);
|
93
|
+
|
94
|
+
// IsPortugueseLanguage
|
95
|
+
// --------------------
|
96
|
+
//
|
97
|
+
// This function returns true if the language is any of the Portuguese
|
98
|
+
// languages (regular, Portugal or Brazil)
|
99
|
+
bool IsPortugueseLanguage(Language lang);
|
100
|
+
|
101
|
+
// IsSameLanguage
|
102
|
+
// --------------
|
103
|
+
//
|
104
|
+
// WARNING: This function provides only a simple test on the values of
|
105
|
+
// the two Language arguments. It returns false if either language is
|
106
|
+
// invalid. It returns true if the language arguments are equal, or
|
107
|
+
// if they are both Chinese languages, both Norwegian languages, or
|
108
|
+
// both Portuguese languages, as defined by IsChineseLanguage,
|
109
|
+
// IsNorwegianLanguage, and IsPortugueseLanguage. Otherwise it returns
|
110
|
+
// false.
|
111
|
+
bool IsSameLanguage(Language lang1, Language lang2);
|
112
|
+
|
113
|
+
|
114
|
+
// IsRightToLeftLanguage
|
115
|
+
// ---------------------
|
116
|
+
//
|
117
|
+
// This function returns true if the language is only written right-to-left
|
118
|
+
// (E.g., Hebrew, Arabic, Persian etc.)
|
119
|
+
//
|
120
|
+
// IMPORTANT NOTE: Technically we're talking about scripts, not languages.
|
121
|
+
// There are languages that can be written in more than one script.
|
122
|
+
// Examples:
|
123
|
+
// - Kurdish and Azeri ('AZERBAIJANI') can be written left-to-right in
|
124
|
+
// Latin or Cyrillic script, and right-to-left in Arabic script.
|
125
|
+
// - Sindhi and Punjabi are written in different scripts, depending on
|
126
|
+
// region and dialect.
|
127
|
+
// - Turkmen used an Arabic script historically, but not any more.
|
128
|
+
// - Pashto and Uyghur can use Arabic script, but use a Roman script
|
129
|
+
// on the Internet.
|
130
|
+
// - Kashmiri and Urdu are written either with Arabic or Devanagari script.
|
131
|
+
//
|
132
|
+
// This function only returns true for languages that are always, unequivocally
|
133
|
+
// written in right-to-left script.
|
134
|
+
//
|
135
|
+
// TODO(benjy): If we want to do anything special with multi-script languages
|
136
|
+
// we should create new 'languages' for each language+script, as we do for
|
137
|
+
// traditional vs. simplified Chinese. However most such languages are rare in
|
138
|
+
// use and even rarer on the web, so this is unlikely to be something we'll
|
139
|
+
// be concerned with for a while.
|
140
|
+
bool IsRightToLeftLanguage(Language lang);
|
141
|
+
|
142
|
+
// IsMaybeRightToLeftLanguage
|
143
|
+
// --------------------------
|
144
|
+
//
|
145
|
+
// This function returns true if the language may appear on the web in a
|
146
|
+
// right-to-left script (E.g., Hebrew, Arabic, Persian, Urdu, Kurdish, etc.)
|
147
|
+
//
|
148
|
+
// NOTE: See important notes under IsRightToLeftLanguage(...).
|
149
|
+
//
|
150
|
+
// This function returns true for languages that *may* appear on the web in a
|
151
|
+
// right-to-left script, even if they may also appear in a left-to-right
|
152
|
+
// script.
|
153
|
+
//
|
154
|
+
// This function should typically be used in cases where doing some work on
|
155
|
+
// left-to-right text would be OK (usually a no-op), and this function is used
|
156
|
+
// just to cut down on unnecessary work on regular, LTR text.
|
157
|
+
bool IsMaybeRightToLeftLanguage(Language lang);
|
158
|
+
|
159
|
+
// IsScriptRequiringLongerSnippets
|
160
|
+
// --------------------
|
161
|
+
//
|
162
|
+
// This function returns true if the script chracteristics require longer
|
163
|
+
// snippet length (Devanagari, Bengali, Gurmukhi,
|
164
|
+
// Gujarati, Oriya, Tamil, Telugu, Kannada, Malayalam).
|
165
|
+
// COMMENTED OUT TO REDUCE DEPENDENCIES ON GOOGLE3 CODE
|
166
|
+
// bool IsScriptRequiringLongerSnippets(UnicodeScript script);
|
167
|
+
|
168
|
+
|
169
|
+
// *******************************************
|
170
|
+
// LANGUAGE NAMES
|
171
|
+
//
|
172
|
+
// This interface defines a standard name for each valid Language,
|
173
|
+
// and a standard name for invalid languages. Some language names use all
|
174
|
+
// uppercase letters, but others use mixed case.
|
175
|
+
// LanguageName() [Language to name]
|
176
|
+
// LanguageEnumName() [language to enum name]
|
177
|
+
// LanguageFromName() [name to Language]
|
178
|
+
// default_language_name()
|
179
|
+
// invalid_language_name()
|
180
|
+
// *******************************************
|
181
|
+
|
182
|
+
// Given a Language, returns its standard name.
|
183
|
+
// Return invalid_language_name() if the language is invalid.
|
184
|
+
const char* LanguageName(Language lang);
|
185
|
+
|
186
|
+
// Given a Language, return the name of the enum constant for that
|
187
|
+
// language. In all but a few cases, this is the same as its standard
|
188
|
+
// name. For example, LanguageName(CHINESE) returns "Chinese", but
|
189
|
+
// LanguageEnumName(CHINESE) returns "CHINESE". This is intended for
|
190
|
+
// code that is generating C++ code, where the enum constant is more
|
191
|
+
// useful than its integer value. Return "NUM_LANGUAGES" if
|
192
|
+
// the language is invalid.
|
193
|
+
const char* LanguageEnumName(Language lang);
|
194
|
+
|
195
|
+
// The maximum length of a standard language name.
|
196
|
+
const int kMaxLanguageNameSize = 50;
|
197
|
+
|
198
|
+
// The standard name for the default language.
|
199
|
+
const char* default_language_name();
|
200
|
+
|
201
|
+
// The standard name for all invalid languages.
|
202
|
+
const char* invalid_language_name();
|
203
|
+
|
204
|
+
// If lang_name matches the standard name of a Language, using a
|
205
|
+
// case-insensitive comparison, set *language to that Language and
|
206
|
+
// return true.
|
207
|
+
// Otherwise, set *language to UNKNOWN_LANGUAGE and return false.
|
208
|
+
//
|
209
|
+
// For backwards compatibility, "HATIAN_CREOLE" is allowed as a name
|
210
|
+
// for HAITIAN_CREOLE, and "QUECHAU" is allowed as a name for QUECHUA.
|
211
|
+
// For compatibility with LanguageEnumName, "UNKNOWN_LANGUAGE" is allowed
|
212
|
+
// as a name for UNKNOWN_LANGUAGE (the return value is true in this case,
|
213
|
+
// as it is for "Unknown"), and "CHINESE_T" is allowed as a name for
|
214
|
+
// CHINESE_T (i.e., a synonym for "ChineseT").
|
215
|
+
//
|
216
|
+
// REQUIRES: language must not be NULL.
|
217
|
+
//
|
218
|
+
bool LanguageFromName(const char* lang_name, Language *language);
|
219
|
+
|
220
|
+
|
221
|
+
|
222
|
+
// *******************************************
|
223
|
+
// LANGUAGE CODES
|
224
|
+
//
|
225
|
+
// This interface defines a standard code for each valid language, and
|
226
|
+
// a standard code for invalid languages. These are derived from ISO codes,
|
227
|
+
// with some Google additions.
|
228
|
+
// LanguageCode()
|
229
|
+
// default_language_code()
|
230
|
+
// invalid_language_code()
|
231
|
+
// LanguageCodeWithDialects()
|
232
|
+
// LanguageCodeISO639_1()
|
233
|
+
// LanguageCodeISO639_2()
|
234
|
+
// *******************************************
|
235
|
+
|
236
|
+
// Given a Language, return its standard code. There are Google-specific codes:
|
237
|
+
// For CHINESE_T, return "zh-TW".
|
238
|
+
// For TG_UNKNOWN_LANGUAGE, return "ut".
|
239
|
+
// For UNKNOWN_LANGUAGE, return "un".
|
240
|
+
// For PORTUGUESE_P, return "pt-PT".
|
241
|
+
// For PORTUGUESE_B, return "pt-BR".
|
242
|
+
// For LIMBU, return "sit-NP".
|
243
|
+
// For CHEROKEE, return "chr".
|
244
|
+
// For SYRIAC, return "syr".
|
245
|
+
// Otherwise return the ISO 639-1 two-letter language code for lang.
|
246
|
+
// If lang is invalid, return invalid_language_code().
|
247
|
+
//
|
248
|
+
// NOTE: See the note below about the codes for Chinese languages.
|
249
|
+
//
|
250
|
+
const char* LanguageCode(Language lang);
|
251
|
+
|
252
|
+
// The maximum length of a language code.
|
253
|
+
const int kMaxLanguageCodeSize = 50;
|
254
|
+
|
255
|
+
// The standard code for the default language.
|
256
|
+
const char* default_language_code();
|
257
|
+
|
258
|
+
// The standard code for all invalid languages.
|
259
|
+
const char* invalid_language_code();
|
260
|
+
|
261
|
+
|
262
|
+
// --------------------------------------------
|
263
|
+
// NOTE: CHINESE LANGUAGE CODES
|
264
|
+
//
|
265
|
+
// There are three functions that return codes for Chinese languages.
|
266
|
+
// LanguageCode(lang) and LanguageCodeWithDialects(lang) are defined here.
|
267
|
+
// LanguageCode(lang, encoding) is defined in i18n/encodings.lang_enc.h.
|
268
|
+
// The following list shows the different results.
|
269
|
+
//
|
270
|
+
// LanguageCode(CHINESE) returns "zh"
|
271
|
+
// LanguageCode(CHINESE_T) returns "zh-TW".
|
272
|
+
//
|
273
|
+
// LanguageCodeWithDialects(CHINESE) returns "zh-CN".
|
274
|
+
// LanguageCodeWithDialects(CHINESE_T) returns "zh-TW".
|
275
|
+
//
|
276
|
+
// LanguageCode(CHINESE_T, <any encoding>) returns "zh-TW".
|
277
|
+
// LanguageCode(CHINESE, CHINESE_BIG5) returns "zh-TW".
|
278
|
+
// LanguageCode(CHINESE, <any other encoding>) returns "zh-CN".
|
279
|
+
//
|
280
|
+
// --------------------------------------------
|
281
|
+
|
282
|
+
// LanguageCodeWithDialects
|
283
|
+
// ------------------------
|
284
|
+
//
|
285
|
+
// If lang is CHINESE, return "zh-CN". Otherwise return LanguageCode(lang).
|
286
|
+
const char* LanguageCodeWithDialects(Language lang);
|
287
|
+
|
288
|
+
// LanguageCodeISO639_1
|
289
|
+
// --------------------
|
290
|
+
//
|
291
|
+
// Return the ISO 639-1 two-letter language code for lang.
|
292
|
+
// Return invalid_language_code() if lang is invalid or does not have
|
293
|
+
// an ISO 639-1 two-letter language code.
|
294
|
+
const char* LanguageCodeISO639_1(Language lang);
|
295
|
+
|
296
|
+
// LanguageCodeISO639_2
|
297
|
+
// --------------------
|
298
|
+
//
|
299
|
+
// Return the ISO 639-2 three-letter language for lang.
|
300
|
+
// Return invalid_language_code() if lang is invalid or does not have
|
301
|
+
// an ISO 639-2 three-letter language code.
|
302
|
+
const char* LanguageCodeISO639_2(Language lang);
|
303
|
+
|
304
|
+
// LanguageFromCode
|
305
|
+
// ----------------
|
306
|
+
//
|
307
|
+
// If lang_code matches the code for a Language, using a case-insensitive
|
308
|
+
// comparison, set *lang to that Language and return true.
|
309
|
+
// Otherwise, set *lang to UNKNOWN_LANGUAGE and return false.
|
310
|
+
//
|
311
|
+
// lang_code can be an ISO 639-1 (two-letter) code, an ISO 639-2
|
312
|
+
// (three-letter) code, or a Google-specific code (see LanguageCode).
|
313
|
+
//
|
314
|
+
// Certain language-code aliases are also allowed:
|
315
|
+
// For "zh-cn" and "zh_cn", set *lang to CHINESE.
|
316
|
+
// For "zh-tw" and "zh_tw", set *lang to CHINESE_T.
|
317
|
+
// For "he", set *lang to HEBREW.
|
318
|
+
// For "in", set *lang to INDONESIAN.
|
319
|
+
// For "ji", set *lang to YIDDISH.
|
320
|
+
// For "fil", set *lang to TAGALOG.
|
321
|
+
//
|
322
|
+
// REQUIRES: 'lang' must not be NULL.
|
323
|
+
bool LanguageFromCode(const char* lang_code, Language *language);
|
324
|
+
|
325
|
+
|
326
|
+
// LanguageFromCodeOrName
|
327
|
+
// ----------------------
|
328
|
+
//
|
329
|
+
// If lang_code_or_name is a language code or a language name.
|
330
|
+
// set *language to the corresponding Language and return true.
|
331
|
+
// Otherwise set *language to UNKNOWN_LANGUAGE and return false.
|
332
|
+
//
|
333
|
+
bool LanguageFromCodeOrName(const char* lang_code_or_name,
|
334
|
+
Language* language);
|
335
|
+
|
336
|
+
// LanguageNameFromCode
|
337
|
+
// --------------------
|
338
|
+
//
|
339
|
+
// If language_code is the code for a Language (see LanguageFromCode),
|
340
|
+
// return the standard name of that language (see LanguageName).
|
341
|
+
// Otherwise return invalid_language_name().
|
342
|
+
//
|
343
|
+
const char* LanguageNameFromCode(const char* language_code);
|
344
|
+
|
345
|
+
|
346
|
+
// Miscellany
|
347
|
+
|
348
|
+
// LanguageCodeToUnderscoreForm
|
349
|
+
// ----------------------------
|
350
|
+
//
|
351
|
+
// Given a language code, convert the dash "-" to underscore "_".
|
352
|
+
//
|
353
|
+
// Specifically, if result_length <= strlen(lang_code), set result[0]
|
354
|
+
// to '\0' and return false. Otherwise, copy lang_code to result,
|
355
|
+
// converting every dash to an underscore, converting every character
|
356
|
+
// before the first dash or underscore to lower case, and converting
|
357
|
+
// every character after the first dash or underscore to upper
|
358
|
+
// case. If there is no dash or underscore, convert the entire string
|
359
|
+
// to lower case.
|
360
|
+
//
|
361
|
+
// REQUIRES: 'lang_code' must not be NULL. 'result' must not be NULL.
|
362
|
+
|
363
|
+
bool LanguageCodeToUnderscoreForm(const char* lang_code,
|
364
|
+
char* result,
|
365
|
+
int result_length);
|
366
|
+
|
367
|
+
//
|
368
|
+
// AlwaysPutInExpectedRestrict
|
369
|
+
// ---------------------------
|
370
|
+
//
|
371
|
+
// For Web pages in certain top-level domains, Web Search always
|
372
|
+
// applies a "country restrict". If 'tld' matches one of those, using
|
373
|
+
// a case-SENSITIVE comparison, set *expected_language to the Language
|
374
|
+
// most commonly found in that top-level domain and return true.
|
375
|
+
// Otherwise, set *expected_language to UNKNOWN_LANGUAGE and return false.
|
376
|
+
bool AlwaysPutInExpectedRestrict(const char *tld, Language *expected_language);
|
377
|
+
|
378
|
+
|
379
|
+
#endif // LANGUAGES_PUBLIC_LANGUAGES_H_
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'language_detection/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "language_detection"
|
8
|
+
gem.version = LanguageDetection::VERSION
|
9
|
+
gem.authors = ["Vojtech Hyza"]
|
10
|
+
gem.email = ["vhyza@vhyza.eu"]
|
11
|
+
gem.description = %q{Language detection}
|
12
|
+
gem.summary = %q{Wrapped Chrome's compact language detector}
|
13
|
+
gem.homepage = ""
|
14
|
+
|
15
|
+
gem.files = `git ls-files`.split($/)
|
16
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
|
+
gem.require_paths = ["lib"]
|
19
|
+
gem.extensions = ["ext/cld/extconf.rb"]
|
20
|
+
|
21
|
+
gem.add_dependency "ffi"
|
22
|
+
gem.add_dependency "hashr"
|
23
|
+
gem.add_dependency "rake"
|
24
|
+
|
25
|
+
gem.add_development_dependency "shoulda"
|
26
|
+
gem.add_development_dependency "mocha"
|
27
|
+
gem.add_development_dependency "turn"
|
28
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
String.class_eval { include LanguageDetection }
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require "language_detection/version"
|
2
|
+
require "ffi"
|
3
|
+
require "hashr"
|
4
|
+
|
5
|
+
module LanguageDetection
|
6
|
+
|
7
|
+
# TODO: Allow language hint
|
8
|
+
#
|
9
|
+
def self.perform(text, is_plain_text = false)
|
10
|
+
result = language_detection(text.to_s, is_plain_text)
|
11
|
+
|
12
|
+
language = parse_result(result, result.members - [:details])
|
13
|
+
language[:details] = []
|
14
|
+
|
15
|
+
details = FFI::Pointer.new(LanguageDetection::Detail, result[:details])
|
16
|
+
3.times do |i|
|
17
|
+
detail = parse_result(LanguageDetection::Detail.new(details[i]))
|
18
|
+
language[:details] << detail unless detail.code == 'un'
|
19
|
+
end
|
20
|
+
|
21
|
+
language
|
22
|
+
end
|
23
|
+
|
24
|
+
def language(is_plain_text = false)
|
25
|
+
LanguageDetection.perform(self.to_s, is_plain_text)
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def self.parse_result(result, members = result.members)
|
31
|
+
Hashr.new(Hash[ members.map {|member| [member.to_sym, result[member]]} ])
|
32
|
+
end
|
33
|
+
|
34
|
+
extend FFI::Library
|
35
|
+
|
36
|
+
class Detail < FFI::Struct
|
37
|
+
layout :name, :string,
|
38
|
+
:code, :string,
|
39
|
+
:percent, :int,
|
40
|
+
:score, :double
|
41
|
+
end
|
42
|
+
|
43
|
+
class Language < FFI::Struct
|
44
|
+
layout :name, :string,
|
45
|
+
:code, :string,
|
46
|
+
:reliable, :bool,
|
47
|
+
:text_bytes, :int,
|
48
|
+
:details, :pointer
|
49
|
+
end
|
50
|
+
|
51
|
+
ffi_lib File.expand_path("../../ext/cld/cld.so", __FILE__)
|
52
|
+
attach_function "language_detection","language_detection", [:buffer_in, :bool], Language.by_value
|
53
|
+
|
54
|
+
end
|
data/test/_helper.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'bundler/setup'
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'shoulda'
|
5
|
+
require 'turn' unless ENV["TM_FILEPATH"] || ENV["CI"]
|
6
|
+
require 'mocha'
|
7
|
+
require File.join(File.expand_path('../../lib/language_detection.rb', __FILE__))
|
8
|
+
|
9
|
+
class Test::Unit::TestCase
|
10
|
+
|
11
|
+
def fixture_file(name)
|
12
|
+
File.read File.expand_path("../fixtures/#{name}", __FILE__)
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
af, aam skukuza die naam beteken hy wat skoonvee of hy wat alles onderstebo keer wysig bosveldkampe boskampe is kleiner afgeleë ruskampe wat oor min fasiliteite beskik daar is geen restaurante of winkels nie en slegs oornagbesoekers word toegelaat bateleur
|
2
|
+
am, ለመጠይቅ ወደ እስክንድርያ ላኩዋቸውና የእስክንድርያ ጳጳስ አቴናስዮስ ፍሬምንጦስን እራሳቸውን ሾመው ልከዋል ከዚያ እስከ ዓ ም ድረስ የኢትዮጵያ አቡነ
|
3
|
+
ar, احتيالية بيع أي حساب
|
4
|
+
az, a az qalıb breyn rinq intellektual oyunu üzrə yarışın zona mərhələləri keçirilib miq un qalıqlarının dənizdən çıxarılması davam edir məhəmməd peyğəmbərin karikaturalarını çap edən qəzetin baş redaktoru iş otağında ölüb
|
5
|
+
be, а друкаваць іх не было тэхнічна магчыма бліжэй за вільню тым самым часам нямецкае кіраўніцтва прапаноўвала апроч ўвядзення лацінкі яе
|
6
|
+
bg, а дума попада в състояние на изпитание ключовите думи с предсказана малко под то изискване на страниците за търсене в
|
7
|
+
bn, ংখ্যা নমুনায়ন বিন্যাস পরিসংখ্যানিক মডেল পরিসংখ্যানিক সিদ্ধান্ত ফাংশন পরিসংখ্যানিক
|
8
|
+
bo, གང ནི ཀུན ལ སྦྱར པ དང ཅན ལྡན བདག པོའི སྒྲ ག ད བ ས ན མ པ ང འ ར ལ མཐའ མེད པ བདག པོའི སྒྲ ལ པ ཉིད དོ མ མི མིན
|
9
|
+
sr, историја босне књ историја босне књ историја босне књ историја босне књ
|
10
|
+
ca, al final en un únic lloc nhorabona l correu electrònic està concebut com a eina de productivitat aleshores per què perdre el temps arxivant missatges per després intentar recordar on els veu desar i per què heu d eliminar missatges importants per l
|
11
|
+
chr, ᎠᎢᏍᎩ ᎠᏟᎶᏍᏗ ᏥᏄᏍᏛᎩ ᎦᎫᏍᏛᏅᎯ ᎾᎥᎢ
|
12
|
+
cs, a akci opakujte film uložen vykreslit gmail tokio smazat obsah adresáře nelze načíst systémový profil jednotky smoot okud používáte pro určení polokoule značky z západ nebo v východ používejte nezáporné hodnoty zeměpisné délky nelze
|
13
|
+
cy, a chofrestru eich cyfrif ymwelwch a unwaith i chi greu eich cyfrif mi fydd yn cael ei hysbysu o ch cyfeiriad ebost newydd fel eich bod yn gallu cadw mewn cysylltiad drwy gmail os nad ydych chi wedi clywed yn barod am gmail mae n gwasanaeth gwebost
|
14
|
+
da, a z tallene og punktummer der er tilladte log ud angiv den ønskede adgangskode igen november gem personlige oplysninger kontrolspørgsmål det sidste tegn i dit brugernavn skal være et bogstav a z eller tal skriv de tegn du kan se i billedet nedenfor
|
15
|
+
de, abschnitt ordner aktivieren werden die ordnereinstellungen im farbabschnitt deaktiviert öchten sie wirklich fortfahren eldtypen angeben optional n diesem schritt geben sie für jedesfeld aus dem datenset den typ an ieser schritt ist optional eldtypen
|
16
|
+
dv, ހިންދީ ބަހުން ވާހަކަ ދައްކާއިރު ދެވަނަ ބަހެއްގެ ގޮތުގައާއި އެނޫން ގޮތްގޮތުން ހިންދީ ބަހުން ވާހަކަ ދައްކާ މީހުންގެ އަދަދު މިލިއަނަށް
|
17
|
+
el, ή αρνητική αναζήτηση λέξης κλειδιού καταστήστε τις μεμονωμένες λέξεις κλειδιά περισσότερο στοχοθετημένες με τη μετατροπή τους σε
|
18
|
+
en, a backup credit card by visiting your billing preferences page or visit the adwords help centre for more details https adwords google com support bin answer py answer hl en we were unable to process the payment of for your outstanding google adwords
|
19
|
+
es, a continuación haz clic en el botón obtener ruta también puedes desplazarte hasta el final de la página para cambiar tus opciones de búsqueda gráfico y detalles ésta es una lista de los vídeos que te recomendamos nuestras recomendaciones se basan
|
20
|
+
et, a niipea kui sinu maksimaalne igakuine krediidi limiit on meie poolt heaks kiidetud on sinu kohustuseks see krediidilimiit
|
21
|
+
eu, a den eraso bat honen kontra hortaz eragiketa bakarrik behar dituen eraso batek aes apurtuko luke nahiz eta oraingoz eraso bideraezina izan gaur egungo teknologiaren mugak direla eta oraingoz kezka hauek alde batera utzi daitezke orain arteko indar
|
22
|
+
fa, آب خوردن عجله می کردند به جای باز ی کتک کاری می کردند و همه چيز مثل قبل بود فقط من ماندم و يک دنيا حرف و انتظار تا عاقبت رسيد احضاريه ی ای با
|
23
|
+
fi, a joilla olet käynyt tämä kerro meille kuka ä olet ei tunnistettavia käyttötietoja kuten virheraportteja käytetään google desktopin parantamiseen etsi näyttää mukautettuja uutisia google desktop keskivaihto leikkaa voit kaksoisnapsauttaa
|
24
|
+
fr, a accès aux collections et aux frontaux qui lui ont été attribués il peut consulter et modifier ses collections et exporter des configurations de collection toutefois il ne peut pas créer ni supprimer des collections enfin il a accès aux fonctions
|
25
|
+
ga, a bhfuil na focail go léir i do cheist le fáil orthu ní gá ach focail breise a chur leis na cinn a cuardaíodh cheana chun an cuardach a bheachtú nó a chúngú má chuirtear focal breise isteach aimseofar fo aicme ar leith de na torthaí a fuarthas
|
26
|
+
gu, આના પરિણામ પ્રમાણસર ફોન્ટ અવતરણ ચિન્હવાળા પાઠને છુપાવો બધા સમૂહો શોધાયા હાલનો જ સંદેશ વિષયની
|
27
|
+
hi, ं ऐडवर्ड्स विज्ञापनों के अनुभव पर आधारित हैं और इनकी मदद से आपको अपने विज्ञापनों का अधिकतम लाभ
|
28
|
+
hr, venski poljski kašupski dolnolužički gornjolužički češki slovački istočnoslavenski ruski bjeloruski ukrajinski rusinski i južnoslavenski slovenski hrvatski srpski crnogorski bošnjački makedonski i bugarski južnoslavenski jezici
|
29
|
+
ht, ak pitit tout sosyete a chita se pou sa leta dwe pwoteje yo nimewo leta fèt pou li pwoteje tout paran ak pitit nan peyi a menm jan kit paran yo marye kit yo pa marye tout manman ki fè pitit leta fèt pou ba yo konkoul menm jan tou pou timoun piti ak pou
|
30
|
+
hu, a felhasználóim a google azonosító szöveget ikor látják a felhasználóim a google azonosító szöveget felhasználók a google azonosító szöveget fogják látni minden tranzakció után ha a vásárlását regisztrációját oldalunk
|
31
|
+
hy, ա յ եվ նա հիացած աչքերով նայում է հինգհարկանի շենքի տարօրինակ փոքրիկ քառակուսի պատուհաններին դեռ մենք շատ ենք հետամնաց ասում է նա այսպես է
|
32
|
+
ms, account anda dana anda belum kami terima berikan kami waktu sedikit lagi kami masih menunggu penerimaan transfer bank anda jika anda belum mengirimkan dana anda silakan kunjungi lakukan transfer bank ikuti instruksi pada halaman untuk memulai proses
|
33
|
+
is, a afköst leitarorða þinna leitarorð neikvæð leitarorð auglýsingahópa byggja upp aðallista yfir ný leitarorð fyrir auglýsingahópana og skoða ítarleg gögn um árangur leitarorða eins og samkeppni auglýsenda og leitarmagn er krafist notkun
|
34
|
+
it, a causa di un intervento di manutenzione del sistema fino alle ore circa ora legale costa del pacifico del novembre le campagne esistenti continueranno a essere pubblicate come di consueto anche durante questo breve periodo di inattività ci scusiamo per
|
35
|
+
iu, ᐃᑯᒪᒻᒪᑦ ᕿᓈᖏᓐᓇᓲᖑᒻᒪᑦ ᑎᑎᖅᑕᓕᒫᖅᓃᕕᑦ ᑎᑦᕆᐊᑐᓐᖏᑦᑕᑎᑦ ᑎᑎᖅᑕᑉᐱᑦ ᓯᕗᓂᖓᓂ ᑎᑎᖅᖃᖅ ᑎᑎᕆᐊᑐᓐᖏᑕᐃᑦ ᕿᓂᓲᖑᔪᒍᑦ ᑎᑎᖅᑕᓕᒫᖅᓃᕕᑦ
|
36
|
+
he, או לערוך את העדפות ההפצה אנא עקוב אחרי השלבים הבאים כנס לחשבון האישי שלך ב
|
37
|
+
ja, このペ ジでは アカウントに指定された予算の履歴を一覧にしています それぞれの項目には 予算額と特定期間のステ タスが表示されます 現在または今後の予算を設定するには
|
38
|
+
ka, ა ბირთვიდან მიღებული ელემენტი მენდელეევის პერიოდულ სიტემაში გადაინაცვლებს ორი უჯრით
|
39
|
+
km, ក ខ គ ឃ ង ច ឆ ជ ឈ ញ ដ ឋ ឌ ឍ ណ ត ថ ទ ធ ន ប ផ ព ភ ម យ រ ល វ ស ហ ឡ អ ឥ ឦ ឧ ឪ ឫ ឬ ឯ ឱ ទាំងអស់
|
40
|
+
kn, ಂಠಯ್ಯನವರು ತುಮಕೂರು ಜಿಲ್ಲೆಯ ಚಿಕ್ಕನಾಯಕನಹಳ್ಳಿ ತಾಲ್ಲೂಕಿನ ತೀರ್ಥಪುರ ವೆಂಬ ಸಾಧಾರಣ ಹಳ್ಳಿಯ ಶ್ಯಾನುಭೋಗರ
|
41
|
+
ko, 개별적으로 리포트 액세스 권한을 부여할 수 있습니다 액세스 권한 부여사용자에게 프로필 리포트에 액세스할 수 있는 권한을 부여하시려면 가용 프로필 상자에서 프로필 이름을 선택한 다음
|
42
|
+
lo, ກຫາທົ່ວທັງເວັບ ແລະໃນເວັບໄຮ້ສາຍ ທຳອິດໃຫ້ທຳການຊອກຫາກ່ອນ ຈາກນັ້ນ ໃຫ້ກົດປຸ່ມເມນູ ໃນໜ້າຜົນໄດ້
|
43
|
+
lt, a išsijungia mano idėja dėl geriausio laiko po pastarųjų savo santykių pasimokiau penki dalykai be kurių negaliu gyventi mano miegamajame tu surasi ideali pora išsilavinimas aukštoji mokykla koledžas universitetas pagrindinis laipsnis metai
|
44
|
+
lv, a gadskārtējā izpārdošana slēpošana jāņi atlaide izmaiņas trafikā kas saistītas ar sezonas izpārdošanu speciālajām atlaidēm u c ir parastas un atslēgvārdi kas ir populāri noteiktos laika posmos šajā laikā saņems lielāku klikšķu
|
45
|
+
mk, гласовите коалицијата на вмро дпмне како партија со најмногу освоени гласови ќе добие евра а на сметката на коализијата за македонија
|
46
|
+
ml, ം അങ്ങനെ ഞങ്ങള് അവരുടെ മുമ്പില് നിന്നു ഔടും ഉടനെ നിങ്ങള് പതിയിരിപ്പില് നിന്നു എഴുന്നേറ്റു
|
47
|
+
ms, bilik sebelah berkata julai pada pm ladymariah hmm sume ni terpulang kepada individu mungkin anda bernasib baik selama ini dalam membeli hp yang bagus deli berkata julai pada pm walaupun bukan bahsa baku tp tetap bahasa melayu kan perubahan boleh dibuat
|
48
|
+
mt, ata ikteb messaġġ lil indirizzi differenti billi tagħżilhom u tagħfas il buttuna ikteb żid numri tfittxijja tal kotba mur print home kotba minn pagni ghal pagna minn ghall ktieb ta aċċessa stieden habib iehor grazzi it tim tal gruppi google
|
49
|
+
nl, a als volgt te werk om een configuratiebestand te maken sitemap gen py ebruik filters om de s op te geven die moeten worden toegevoegd of uitgesloten op basis van de opmaaktaal elke sitemap mag alleen de s bevatten voor een bepaalde opmaaktaal dit
|
50
|
+
nb, a er obligatorisk tidsforskyvning plassering av katalogsøk planinformasjon loggfilbane gruppenavn kontoinformasjon passord domene gruppeinformasjon alle kampanjesporing alternativ bruker grupper oppgaveplanlegger oppgavehistorikk kontosammendrag antall
|
51
|
+
or, ଅକ୍ଟୋବର ଡିସେମ୍ବର
|
52
|
+
pa, ਂ ਦਿਨਾਂ ਵਿਚ ਭਾਈ ਸਾਹਿਬ ਦੀ ਬੁੱਚੜ ਗੋਬਿੰਦ ਰਾਮ ਨਾਲ ਅੜਫਸ ਚੱਲ ਰਹੀ ਸੀ ਗੋਬਿੰਦ ਰਾਮ ਨੇ ਭਾਈ ਸਾਹਿਬ ਦੀਆਂ ਭੈਣਾ
|
53
|
+
pl, a australii będzie widział inne reklamy niż użytkownik z kanady kierowanie geograficzne sprawia że reklamy są lepiej dopasowane do użytkownika twojej strony oznacza to także że możesz nie zobaczyć wszystkich reklam które są wyświetlane na
|
54
|
+
pt, a abit prevê que a entrada desses produtos estrangeiros no mercado têxtil e vestuário do brasil possa reduzir os preços em cerca de a partir de má notícia para os empresários que terão que lutar para garantir suas margens de lucro mas boa notícia
|
55
|
+
ro, a anunţurilor reţineţi nu plătiţi pentru clicuri sau impresii ci numai atunci când pe site ul dvs survine o acţiune dorită site urile negative nu pot avea uri de destinaţie daţi instrucţiuni societăţii dvs bancare sau constructoare să
|
56
|
+
ru, а неправильный формат идентификатора дн назад
|
57
|
+
si, අනුරාධ මිහිඳුකුල නමින් සකුරා ට ලිපියක් තැපෑලෙන් එවා තිබුණා කි ් රස්ටි ෂෙල්ටන් ප ් රනාන්දු ද
|
58
|
+
sk, a aktivovať reklamnú kampaň ak chcete kampaň pred spustením ešte prispôsobiť uložte ju ako šablónu a pokračujte v úprave vyberte si jednu z možností nižšie a kliknite na tlačidlo uložiť kampaň nastavenia kampane môžete ľubovoľne
|
59
|
+
sl, adsense stanje prijave za google adsense google adsense račun je bil začasno zamrznjen pozdravljeni hvala za vaše zanimanje v google adsense po pregledu vaše prijavnice so naši strokovnjaki ugotovili da spletna stran ki je trenutno povezana z vašim
|
60
|
+
sr, балчак балчак на мапи србије уреди демографија у насељу балчак живи пунолетна становника а просечна старост становништва износи година
|
61
|
+
hr, autonomnih pokrajina saveznim zakonom može se propisati poseban sastav organizacija i delokrug saveta za poslove narodne odbrane članove saveta federacije bira na predlog predsedništva savezna skupština iz reda društveno političkih i drugih javnih
|
62
|
+
hr, savjet pobjeda a radi bržeg rada pošto rom radi sporije nego ram izvorni rom se isključuje a dio ram a se rezerviše te se u njega ne ploča procesor ram memorija grafička kartica zvučna kartica modem mrežna kartica napojna jedinica uređaji za pohranjivanje
|
63
|
+
sv, a bort objekt från google desktop post äldst meny öretag dress etaljer alternativ för vad är inne yaste google skrivbord plugin program för nyheter google visa nyheter som är anpassade efter de artiklar som du läser om du till exempel läser
|
64
|
+
sw, a ujumbe mpya jumla unda tafuta na angalia vikundi vya kujadiliana na kushiriki mawazo iliyopangwa kwa tarehe watumiaji wapya futa orodha hizi lugha hoja vishikanisho vilivyo dhaminiwa ujumbe sanaa na tamasha toka udhibitisho wa neno kwa haraka fikia
|
65
|
+
syr, ܐܕܪܝܣ ܓܛܘ ܫܘܪܝܐ ܡܢ ܦܪܢܣܐ ܡܢ ܐܣܦܢܝܐ ܚܐܪܘܬܐ ܒܐܕܪ ܒܢܝܣܢ ܫܛܝܚܘܬܐ ܟܠܢܝܐ ܡܝ̈ܐ ܒܥܠܡܐ
|
66
|
+
ta, அங்கு ராஜேந்திர சோழனால் கட்டப்பட்ட பிரம்மாண்டமான சிவன் கோவில் ஒன்றும் உள்ளது தொகு
|
67
|
+
te, ఁ దనర జయించిన తత్వ మరసి చూడఁ దాన యగును రాజయోగి యిట్లు తేజరిల్లుచు నుండు విశ్వదాభిరామ వినర వేమ
|
68
|
+
th, กฏในการค้นหา หรือหน้าเนื้อหา หากท่านเลือกลงโฆษณา ท่านอาจจะปรับต้องเพิ่มงบประมาณรายวันตา
|
69
|
+
fil, a na ugma sa google ay nakaka bantog sa gitna nang kliks na nangyayari sa pamamagitan nang ordinaryong paggagamit at sa kliks na likha nang pandaraya o hindi tunay na paggamit bunga nito nasasala namin ang mga kliks na hindi kailangan o hindi gusto nang
|
70
|
+
tr, a ayarlarınızı görmeniz ve yönetmeniz içindir eğer kampanyanız için günlük bütçenizi gözden geçirebileceğiniz yeri arıyorsanız kampanya yönetimi ne gidin kampanyanızı seçin ve kampanya ayarlarını düzenle yi tıklayın sunumu
|
71
|
+
uk, а більший бюджет щоб забезпечити собі максимум прибутків від переходів відстежуйте свої об яви за датою географічним розташуванням
|
72
|
+
vi, adsense cho nội dung nhà cung cấp dịch vụ di động xác minh tín dụng thay đổi nhãn kg các ô xem chi phí cho từ chối các đơn đặt hàng dạng cấp dữ liệu ác minh trang web của bạn để xem
|
73
|
+
yi, און פאנטאזיע ער איז באקאנט צים מערסטן פאר זיינע באַלאַדעס ער האָט געוווינט אין ווארשע יעס פאריס ליווערפול און לאנדאן סוף כל סוף איז ער
|
74
|
+
zh, 产品的简报和公告 提交该申请后无法进行更改 请确认您的选择是正确的 对于要提交的图书 我确认 我是版权所有者或已得到版权所有者的授权 要更改您的国家 地区 请在此表的最上端更改您的
|
75
|
+
zh-TW, 之前為 帳單交易作業區 已變更 廣告內容 之前為 銷售代表 之前為 張貼日期為 百分比之前為 合約 為 目標對象條件已刪除 結束日期之前為
|
76
|
+
en, becoose a ve a leemit qooereees tu vurds um gesh dee bork bork nu peges vere a fuoond cunteeening is a fery cummun vurd und ves nut inclooded in yuoor seerch zee ooperetur is unnecessery ve a incloode a ell seerch terms by deffoolt um de hur de hur de hur
|
77
|
+
en, a diffewent type of seawch send feedback about google wiwewess seawch to wap google com wesuwts found on de entiwe web fow wesuwts found on de mobiwe web fow de functionawity of de toolbar up button has been expanded swightwy it now considews fow exampwe
|
78
|
+
en, b x z un b e t und rs n a dr ss p as ry an th r a dr ss ry us n a l ss mb gu us c ti n l ke a z p c d n a dr ss nt r d pl as en r n a dr ss y ur s ar h f r n ar d d n t m tch ny l c ti n w th n m l s nd m r r at d p g s th l c ti ns b l w w r ut m t ca y
|
79
|
+
ms, sukiyaki wikipedia bahasa melayu ensiklopedia bebas sukiyaki dari wikipedia bahasa melayu ensiklopedia bebas lompat ke navigasi gelintar sukiyaki sukiyaki hirisan tipis daging lembu sayur sayuran dan tauhu di dalam periuk besi yang dimasak di atas meja makan dengan cara rebusan sukiyaki dimakan dengan mence
|
80
|
+
id, sukiyaki wikipedia indonesia ensiklopedia bebas berbahasa bebas berbahasa indonesia langsung ke navigasi cari untuk pengertian lain dari sukiyaki lihat sukiyaki irisan tipis daging sapi sayur sayuran dan tahu di dalam panci besi yang dimasak di atas meja makan dengan cara direbus sukiyaki dimakan dengan mence
|