compact_enc_det 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/ext/compact_enc_det/compact_enc_det/CMakeLists.txt +103 -0
- data/ext/compact_enc_det/compact_enc_det/LICENSE +202 -0
- data/ext/compact_enc_det/compact_enc_det/README.md +46 -0
- data/ext/compact_enc_det/compact_enc_det/autogen.sh +74 -0
- data/ext/compact_enc_det/compact_enc_det/compact_enc_det/compact_enc_det.cc +5719 -0
- data/ext/compact_enc_det/compact_enc_det/compact_enc_det/compact_enc_det.h +83 -0
- data/ext/compact_enc_det/compact_enc_det/compact_enc_det/compact_enc_det_fuzz_test.cc +54 -0
- data/ext/compact_enc_det/compact_enc_det/compact_enc_det/compact_enc_det_generated_tables.h +6326 -0
- data/ext/compact_enc_det/compact_enc_det/compact_enc_det/compact_enc_det_generated_tables2.h +856 -0
- data/ext/compact_enc_det/compact_enc_det/compact_enc_det/compact_enc_det_hint_code.cc +169 -0
- data/ext/compact_enc_det/compact_enc_det/compact_enc_det/compact_enc_det_hint_code.h +45 -0
- data/ext/compact_enc_det/compact_enc_det/compact_enc_det/compact_enc_det_unittest.cc +5260 -0
- data/ext/compact_enc_det/compact_enc_det/compact_enc_det/detail_head_string.inc +152 -0
- data/ext/compact_enc_det/compact_enc_det/util/basictypes.h +331 -0
- data/ext/compact_enc_det/compact_enc_det/util/case_insensitive_hash.h +88 -0
- data/ext/compact_enc_det/compact_enc_det/util/commandlineflags.h +39 -0
- data/ext/compact_enc_det/compact_enc_det/util/encodings/encodings.cc +891 -0
- data/ext/compact_enc_det/compact_enc_det/util/encodings/encodings.h +299 -0
- data/ext/compact_enc_det/compact_enc_det/util/encodings/encodings.pb.h +181 -0
- data/ext/compact_enc_det/compact_enc_det/util/encodings/encodings_unittest.cc +34 -0
- data/ext/compact_enc_det/compact_enc_det/util/languages/languages.cc +349 -0
- data/ext/compact_enc_det/compact_enc_det/util/languages/languages.h +381 -0
- data/ext/compact_enc_det/compact_enc_det/util/languages/languages.pb.h +191 -0
- data/ext/compact_enc_det/compact_enc_det/util/logging.h +25 -0
- data/ext/compact_enc_det/compact_enc_det/util/port.h +53 -0
- data/ext/compact_enc_det/compact_enc_det/util/string_util.h +61 -0
- data/ext/compact_enc_det/compact_enc_det/util/varsetter.h +66 -0
- data/ext/compact_enc_det/compact_enc_det.cc +100 -0
- data/ext/compact_enc_det/extconf.rb +20 -0
- data/lib/compact_enc_det/version.rb +3 -0
- data/lib/compact_enc_det.rb +2 -0
- metadata +106 -0
@@ -0,0 +1,381 @@
|
|
1
|
+
// Copyright 2016 Google Inc.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
////////////////////////////////////////////////////////////////////////////////
|
16
|
+
|
17
|
+
#ifndef UTIL_LANGUAGES_LANGUAGES_H_
|
18
|
+
#define UTIL_LANGUAGES_LANGUAGES_H_
|
19
|
+
|
20
|
+
// This interface defines the Language enum and functions that depend
|
21
|
+
// only on Language values.
|
22
|
+
|
23
|
+
// A hash-function for Language, hash<Language>, is defined in
|
24
|
+
// i18n/languages/public/languages-hash.h
|
25
|
+
|
26
|
+
#ifndef SWIG
|
27
|
+
// Language enum defined in languages.proto
|
28
|
+
// Also description on how to add languages.
|
29
|
+
#include "util/languages/languages.pb.h"
|
30
|
+
|
31
|
+
#else
|
32
|
+
|
33
|
+
// TODO: Include a header containing swig-compatible enum.
|
34
|
+
|
35
|
+
#endif
|
36
|
+
|
37
|
+
const int kNumLanguages = NUM_LANGUAGES;
|
38
|
+
|
39
|
+
// Return the default language (ENGLISH).
|
40
|
+
Language default_language();
|
41
|
+
|
42
|
+
|
43
|
+
// *******************************************
|
44
|
+
// Language predicates
|
45
|
+
// IsValidLanguage()
|
46
|
+
// IS_LANGUAGE_UNKNOWN()
|
47
|
+
// IsCJKLanguage()
|
48
|
+
// IsChineseLanguage()
|
49
|
+
// IsNorwegianLanguage()
|
50
|
+
// IsPortugueseLanguage()
|
51
|
+
// IsRightToLeftLanguage()
|
52
|
+
// IsMaybeRightToLeftLanguage()
|
53
|
+
// IsSameLanguage()
|
54
|
+
// IsScriptRequiringLongerSnippets()
|
55
|
+
// *******************************************
|
56
|
+
|
57
|
+
// IsValidLanguage
|
58
|
+
// ===============
|
59
|
+
//
|
60
|
+
// Function to check if the input is within range of the Language enum. If
|
61
|
+
// IsValidLanguage(lang) returns true, it is safe to call
|
62
|
+
// static_cast<Language>(lang).
|
63
|
+
//
|
64
|
+
inline bool IsValidLanguage(int lang) {
|
65
|
+
return ((lang >= 0) && (lang < kNumLanguages));
|
66
|
+
}
|
67
|
+
|
68
|
+
// Return true if the language is "unknown". (This function was
|
69
|
+
// previously a macro, hence the spelling in all caps.)
|
70
|
+
//
|
71
|
+
inline bool IS_LANGUAGE_UNKNOWN(Language lang) {
|
72
|
+
return lang == TG_UNKNOWN_LANGUAGE || lang == UNKNOWN_LANGUAGE;
|
73
|
+
}
|
74
|
+
|
75
|
+
// IsCJKLanguage
|
76
|
+
// -------------
|
77
|
+
//
|
78
|
+
// This function returns true if the language is either Chinese
|
79
|
+
// (simplified or traditional), Japanese, or Korean.
|
80
|
+
bool IsCJKLanguage(Language lang);
|
81
|
+
|
82
|
+
// IsChineseLanguage
|
83
|
+
// -----------------
|
84
|
+
//
|
85
|
+
// This function returns true if the language is either Chinese
|
86
|
+
// (simplified or traditional)
|
87
|
+
bool IsChineseLanguage(Language lang);
|
88
|
+
|
89
|
+
// IsNorwegianLanguage
|
90
|
+
// --------------------
|
91
|
+
//
|
92
|
+
// This function returns true if the language is any of the Norwegian
|
93
|
+
// (regular or Nynorsk).
|
94
|
+
bool IsNorwegianLanguage(Language lang);
|
95
|
+
|
96
|
+
// IsPortugueseLanguage
|
97
|
+
// --------------------
|
98
|
+
//
|
99
|
+
// This function returns true if the language is any of the Portuguese
|
100
|
+
// languages (regular, Portugal or Brazil)
|
101
|
+
bool IsPortugueseLanguage(Language lang);
|
102
|
+
|
103
|
+
// IsSameLanguage
|
104
|
+
// --------------
|
105
|
+
//
|
106
|
+
// WARNING: This function provides only a simple test on the values of
|
107
|
+
// the two Language arguments. It returns false if either language is
|
108
|
+
// invalid. It returns true if the language arguments are equal, or
|
109
|
+
// if they are both Chinese languages, both Norwegian languages, or
|
110
|
+
// both Portuguese languages, as defined by IsChineseLanguage,
|
111
|
+
// IsNorwegianLanguage, and IsPortugueseLanguage. Otherwise it returns
|
112
|
+
// false.
|
113
|
+
bool IsSameLanguage(Language lang1, Language lang2);
|
114
|
+
|
115
|
+
|
116
|
+
// IsRightToLeftLanguage
|
117
|
+
// ---------------------
|
118
|
+
//
|
119
|
+
// This function returns true if the language is only written right-to-left
|
120
|
+
// (E.g., Hebrew, Arabic, Persian etc.)
|
121
|
+
//
|
122
|
+
// IMPORTANT NOTE: Technically we're talking about scripts, not languages.
|
123
|
+
// There are languages that can be written in more than one script.
|
124
|
+
// Examples:
|
125
|
+
// - Kurdish and Azeri ('AZERBAIJANI') can be written left-to-right in
|
126
|
+
// Latin or Cyrillic script, and right-to-left in Arabic script.
|
127
|
+
// - Sindhi and Punjabi are written in different scripts, depending on
|
128
|
+
// region and dialect.
|
129
|
+
// - Turkmen used an Arabic script historically, but not any more.
|
130
|
+
// - Pashto and Uyghur can use Arabic script, but use a Roman script
|
131
|
+
// on the Internet.
|
132
|
+
// - Kashmiri and Urdu are written either with Arabic or Devanagari script.
|
133
|
+
//
|
134
|
+
// This function only returns true for languages that are always, unequivocally
|
135
|
+
// written in right-to-left script.
|
136
|
+
//
|
137
|
+
// TODO: If we want to do anything special with multi-script languages
|
138
|
+
// we should create new 'languages' for each language+script, as we do for
|
139
|
+
// traditional vs. simplified Chinese. However most such languages are rare in
|
140
|
+
// use and even rarer on the web, so this is unlikely to be something we'll
|
141
|
+
// be concerned with for a while.
|
142
|
+
bool IsRightToLeftLanguage(Language lang);
|
143
|
+
|
144
|
+
// IsMaybeRightToLeftLanguage
|
145
|
+
// --------------------------
|
146
|
+
//
|
147
|
+
// This function returns true if the language may appear on the web in a
|
148
|
+
// right-to-left script (E.g., Hebrew, Arabic, Persian, Urdu, Kurdish, etc.)
|
149
|
+
//
|
150
|
+
// NOTE: See important notes under IsRightToLeftLanguage(...).
|
151
|
+
//
|
152
|
+
// This function returns true for languages that *may* appear on the web in a
|
153
|
+
// right-to-left script, even if they may also appear in a left-to-right
|
154
|
+
// script.
|
155
|
+
//
|
156
|
+
// This function should typically be used in cases where doing some work on
|
157
|
+
// left-to-right text would be OK (usually a no-op), and this function is used
|
158
|
+
// just to cut down on unnecessary work on regular, LTR text.
|
159
|
+
bool IsMaybeRightToLeftLanguage(Language lang);
|
160
|
+
|
161
|
+
// IsScriptRequiringLongerSnippets
|
162
|
+
// --------------------
|
163
|
+
//
|
164
|
+
// This function returns true if the script chracteristics require longer
|
165
|
+
// snippet length (Devanagari, Bengali, Gurmukhi,
|
166
|
+
// Gujarati, Oriya, Tamil, Telugu, Kannada, Malayalam).
|
167
|
+
// COMMENTED OUT TO REDUCE DEPENDENCIES ON GOOGLE3 CODE
|
168
|
+
// bool IsScriptRequiringLongerSnippets(UnicodeScript script);
|
169
|
+
|
170
|
+
|
171
|
+
// *******************************************
|
172
|
+
// LANGUAGE NAMES
|
173
|
+
//
|
174
|
+
// This interface defines a standard name for each valid Language,
|
175
|
+
// and a standard name for invalid languages. Some language names use all
|
176
|
+
// uppercase letters, but others use mixed case.
|
177
|
+
// LanguageName() [Language to name]
|
178
|
+
// LanguageEnumName() [language to enum name]
|
179
|
+
// LanguageFromName() [name to Language]
|
180
|
+
// default_language_name()
|
181
|
+
// invalid_language_name()
|
182
|
+
// *******************************************
|
183
|
+
|
184
|
+
// Given a Language, returns its standard name.
|
185
|
+
// Return invalid_language_name() if the language is invalid.
|
186
|
+
const char* LanguageName(Language lang);
|
187
|
+
|
188
|
+
// Given a Language, return the name of the enum constant for that
|
189
|
+
// language. In all but a few cases, this is the same as its standard
|
190
|
+
// name. For example, LanguageName(CHINESE) returns "Chinese", but
|
191
|
+
// LanguageEnumName(CHINESE) returns "CHINESE". This is intended for
|
192
|
+
// code that is generating C++ code, where the enum constant is more
|
193
|
+
// useful than its integer value. Return "NUM_LANGUAGES" if
|
194
|
+
// the language is invalid.
|
195
|
+
const char* LanguageEnumName(Language lang);
|
196
|
+
|
197
|
+
// The maximum length of a standard language name.
|
198
|
+
const int kMaxLanguageNameSize = 50;
|
199
|
+
|
200
|
+
// The standard name for the default language.
|
201
|
+
const char* default_language_name();
|
202
|
+
|
203
|
+
// The standard name for all invalid languages.
|
204
|
+
const char* invalid_language_name();
|
205
|
+
|
206
|
+
// If lang_name matches the standard name of a Language, using a
|
207
|
+
// case-insensitive comparison, set *language to that Language and
|
208
|
+
// return true.
|
209
|
+
// Otherwise, set *language to UNKNOWN_LANGUAGE and return false.
|
210
|
+
//
|
211
|
+
// For backwards compatibility, "HATIAN_CREOLE" is allowed as a name
|
212
|
+
// for HAITIAN_CREOLE, and "QUECHAU" is allowed as a name for QUECHUA.
|
213
|
+
// For compatibility with LanguageEnumName, "UNKNOWN_LANGUAGE" is allowed
|
214
|
+
// as a name for UNKNOWN_LANGUAGE (the return value is true in this case,
|
215
|
+
// as it is for "Unknown"), and "CHINESE_T" is allowed as a name for
|
216
|
+
// CHINESE_T (i.e., a synonym for "ChineseT").
|
217
|
+
//
|
218
|
+
// REQUIRES: language must not be NULL.
|
219
|
+
//
|
220
|
+
bool LanguageFromName(const char* lang_name, Language *language);
|
221
|
+
|
222
|
+
|
223
|
+
|
224
|
+
// *******************************************
|
225
|
+
// LANGUAGE CODES
|
226
|
+
//
|
227
|
+
// This interface defines a standard code for each valid language, and
|
228
|
+
// a standard code for invalid languages. These are derived from ISO codes,
|
229
|
+
// with some Google additions.
|
230
|
+
// LanguageCode()
|
231
|
+
// default_language_code()
|
232
|
+
// invalid_language_code()
|
233
|
+
// LanguageCodeWithDialects()
|
234
|
+
// LanguageCodeISO639_1()
|
235
|
+
// LanguageCodeISO639_2()
|
236
|
+
// *******************************************
|
237
|
+
|
238
|
+
// Given a Language, return its standard code. There are Google-specific codes:
|
239
|
+
// For CHINESE_T, return "zh-TW".
|
240
|
+
// For TG_UNKNOWN_LANGUAGE, return "ut".
|
241
|
+
// For UNKNOWN_LANGUAGE, return "un".
|
242
|
+
// For PORTUGUESE_P, return "pt-PT".
|
243
|
+
// For PORTUGUESE_B, return "pt-BR".
|
244
|
+
// For LIMBU, return "sit-NP".
|
245
|
+
// For CHEROKEE, return "chr".
|
246
|
+
// For SYRIAC, return "syr".
|
247
|
+
// Otherwise return the ISO 639-1 two-letter language code for lang.
|
248
|
+
// If lang is invalid, return invalid_language_code().
|
249
|
+
//
|
250
|
+
// NOTE: See the note below about the codes for Chinese languages.
|
251
|
+
//
|
252
|
+
const char* LanguageCode(Language lang);
|
253
|
+
|
254
|
+
// The maximum length of a language code.
|
255
|
+
const int kMaxLanguageCodeSize = 50;
|
256
|
+
|
257
|
+
// The standard code for the default language.
|
258
|
+
const char* default_language_code();
|
259
|
+
|
260
|
+
// The standard code for all invalid languages.
|
261
|
+
const char* invalid_language_code();
|
262
|
+
|
263
|
+
|
264
|
+
// --------------------------------------------
|
265
|
+
// NOTE: CHINESE LANGUAGE CODES
|
266
|
+
//
|
267
|
+
// There are three functions that return codes for Chinese languages.
|
268
|
+
// LanguageCode(lang) and LanguageCodeWithDialects(lang) are defined here.
|
269
|
+
// LanguageCode(lang, encoding) is defined in i18n/encodings.lang_enc.h.
|
270
|
+
// The following list shows the different results.
|
271
|
+
//
|
272
|
+
// LanguageCode(CHINESE) returns "zh"
|
273
|
+
// LanguageCode(CHINESE_T) returns "zh-TW".
|
274
|
+
//
|
275
|
+
// LanguageCodeWithDialects(CHINESE) returns "zh-CN".
|
276
|
+
// LanguageCodeWithDialects(CHINESE_T) returns "zh-TW".
|
277
|
+
//
|
278
|
+
// LanguageCode(CHINESE_T, <any encoding>) returns "zh-TW".
|
279
|
+
// LanguageCode(CHINESE, CHINESE_BIG5) returns "zh-TW".
|
280
|
+
// LanguageCode(CHINESE, <any other encoding>) returns "zh-CN".
|
281
|
+
//
|
282
|
+
// --------------------------------------------
|
283
|
+
|
284
|
+
// LanguageCodeWithDialects
|
285
|
+
// ------------------------
|
286
|
+
//
|
287
|
+
// If lang is CHINESE, return "zh-CN". Otherwise return LanguageCode(lang).
|
288
|
+
const char* LanguageCodeWithDialects(Language lang);
|
289
|
+
|
290
|
+
// LanguageCodeISO639_1
|
291
|
+
// --------------------
|
292
|
+
//
|
293
|
+
// Return the ISO 639-1 two-letter language code for lang.
|
294
|
+
// Return invalid_language_code() if lang is invalid or does not have
|
295
|
+
// an ISO 639-1 two-letter language code.
|
296
|
+
const char* LanguageCodeISO639_1(Language lang);
|
297
|
+
|
298
|
+
// LanguageCodeISO639_2
|
299
|
+
// --------------------
|
300
|
+
//
|
301
|
+
// Return the ISO 639-2 three-letter language for lang.
|
302
|
+
// Return invalid_language_code() if lang is invalid or does not have
|
303
|
+
// an ISO 639-2 three-letter language code.
|
304
|
+
const char* LanguageCodeISO639_2(Language lang);
|
305
|
+
|
306
|
+
// LanguageFromCode
|
307
|
+
// ----------------
|
308
|
+
//
|
309
|
+
// If lang_code matches the code for a Language, using a case-insensitive
|
310
|
+
// comparison, set *lang to that Language and return true.
|
311
|
+
// Otherwise, set *lang to UNKNOWN_LANGUAGE and return false.
|
312
|
+
//
|
313
|
+
// lang_code can be an ISO 639-1 (two-letter) code, an ISO 639-2
|
314
|
+
// (three-letter) code, or a Google-specific code (see LanguageCode).
|
315
|
+
//
|
316
|
+
// Certain language-code aliases are also allowed:
|
317
|
+
// For "zh-cn" and "zh_cn", set *lang to CHINESE.
|
318
|
+
// For "zh-tw" and "zh_tw", set *lang to CHINESE_T.
|
319
|
+
// For "he", set *lang to HEBREW.
|
320
|
+
// For "in", set *lang to INDONESIAN.
|
321
|
+
// For "ji", set *lang to YIDDISH.
|
322
|
+
// For "fil", set *lang to TAGALOG.
|
323
|
+
//
|
324
|
+
// REQUIRES: 'lang' must not be NULL.
|
325
|
+
bool LanguageFromCode(const char* lang_code, Language *language);
|
326
|
+
|
327
|
+
|
328
|
+
// LanguageFromCodeOrName
|
329
|
+
// ----------------------
|
330
|
+
//
|
331
|
+
// If lang_code_or_name is a language code or a language name.
|
332
|
+
// set *language to the corresponding Language and return true.
|
333
|
+
// Otherwise set *language to UNKNOWN_LANGUAGE and return false.
|
334
|
+
//
|
335
|
+
bool LanguageFromCodeOrName(const char* lang_code_or_name,
|
336
|
+
Language* language);
|
337
|
+
|
338
|
+
// LanguageNameFromCode
|
339
|
+
// --------------------
|
340
|
+
//
|
341
|
+
// If language_code is the code for a Language (see LanguageFromCode),
|
342
|
+
// return the standard name of that language (see LanguageName).
|
343
|
+
// Otherwise return invalid_language_name().
|
344
|
+
//
|
345
|
+
const char* LanguageNameFromCode(const char* language_code);
|
346
|
+
|
347
|
+
|
348
|
+
// Miscellany
|
349
|
+
|
350
|
+
// LanguageCodeToUnderscoreForm
|
351
|
+
// ----------------------------
|
352
|
+
//
|
353
|
+
// Given a language code, convert the dash "-" to underscore "_".
|
354
|
+
//
|
355
|
+
// Specifically, if result_length <= strlen(lang_code), set result[0]
|
356
|
+
// to '\0' and return false. Otherwise, copy lang_code to result,
|
357
|
+
// converting every dash to an underscore, converting every character
|
358
|
+
// before the first dash or underscore to lower case, and converting
|
359
|
+
// every character after the first dash or underscore to upper
|
360
|
+
// case. If there is no dash or underscore, convert the entire string
|
361
|
+
// to lower case.
|
362
|
+
//
|
363
|
+
// REQUIRES: 'lang_code' must not be NULL. 'result' must not be NULL.
|
364
|
+
|
365
|
+
bool LanguageCodeToUnderscoreForm(const char* lang_code,
|
366
|
+
char* result,
|
367
|
+
int result_length);
|
368
|
+
|
369
|
+
//
|
370
|
+
// AlwaysPutInExpectedRestrict
|
371
|
+
// ---------------------------
|
372
|
+
//
|
373
|
+
// For Web pages in certain top-level domains, Web Search always
|
374
|
+
// applies a "country restrict". If 'tld' matches one of those, using
|
375
|
+
// a case-SENSITIVE comparison, set *expected_language to the Language
|
376
|
+
// most commonly found in that top-level domain and return true.
|
377
|
+
// Otherwise, set *expected_language to UNKNOWN_LANGUAGE and return false.
|
378
|
+
bool AlwaysPutInExpectedRestrict(const char *tld, Language *expected_language);
|
379
|
+
|
380
|
+
|
381
|
+
#endif // UTIL_LANGUAGES_LANGUAGES_H_
|
@@ -0,0 +1,191 @@
|
|
1
|
+
// Copyright 2016 Google Inc.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
////////////////////////////////////////////////////////////////////////////////
|
16
|
+
|
17
|
+
#ifndef UTIL_LANGUAGES_LANGUAGES_PB_H_
|
18
|
+
#define UTIL_LANGUAGES_LANGUAGES_PB_H_
|
19
|
+
|
20
|
+
enum Language {
|
21
|
+
ENGLISH = 0,
|
22
|
+
DANISH = 1,
|
23
|
+
DUTCH = 2,
|
24
|
+
FINNISH = 3,
|
25
|
+
FRENCH = 4,
|
26
|
+
GERMAN = 5,
|
27
|
+
HEBREW = 6,
|
28
|
+
ITALIAN = 7,
|
29
|
+
JAPANESE = 8,
|
30
|
+
KOREAN = 9,
|
31
|
+
NORWEGIAN = 10,
|
32
|
+
POLISH = 11,
|
33
|
+
PORTUGUESE = 12,
|
34
|
+
RUSSIAN = 13,
|
35
|
+
SPANISH = 14,
|
36
|
+
SWEDISH = 15,
|
37
|
+
CHINESE = 16,
|
38
|
+
CZECH = 17,
|
39
|
+
GREEK = 18,
|
40
|
+
ICELANDIC = 19,
|
41
|
+
LATVIAN = 20,
|
42
|
+
LITHUANIAN = 21,
|
43
|
+
ROMANIAN = 22,
|
44
|
+
HUNGARIAN = 23,
|
45
|
+
ESTONIAN = 24,
|
46
|
+
TG_UNKNOWN_LANGUAGE = 25,
|
47
|
+
UNKNOWN_LANGUAGE = 26,
|
48
|
+
BULGARIAN = 27,
|
49
|
+
CROATIAN = 28,
|
50
|
+
SERBIAN = 29,
|
51
|
+
IRISH = 30, // UI only.
|
52
|
+
GALICIAN = 31,
|
53
|
+
TAGALOG = 32, // Tagalog (tl) + Filipino (fil),
|
54
|
+
TURKISH = 33,
|
55
|
+
UKRAINIAN = 34,
|
56
|
+
HINDI = 35,
|
57
|
+
MACEDONIAN = 36,
|
58
|
+
BENGALI = 37,
|
59
|
+
INDONESIAN = 38,
|
60
|
+
LATIN = 39, // UI only.
|
61
|
+
MALAY = 40,
|
62
|
+
MALAYALAM = 41,
|
63
|
+
WELSH = 42, // UI only.
|
64
|
+
NEPALI = 43,
|
65
|
+
TELUGU = 44,
|
66
|
+
ALBANIAN = 45,
|
67
|
+
TAMIL = 46,
|
68
|
+
BELARUSIAN = 47,
|
69
|
+
JAVANESE = 48, // UI only.
|
70
|
+
OCCITAN = 49, // UI only.
|
71
|
+
URDU = 50,
|
72
|
+
BIHARI = 51,
|
73
|
+
GUJARATI = 52,
|
74
|
+
THAI = 53,
|
75
|
+
ARABIC = 54,
|
76
|
+
CATALAN = 55,
|
77
|
+
ESPERANTO = 56,
|
78
|
+
BASQUE = 57,
|
79
|
+
INTERLINGUA = 58, // UI only.
|
80
|
+
KANNADA = 59,
|
81
|
+
PUNJABI = 60,
|
82
|
+
SCOTS_GAELIC = 61, // UI only.
|
83
|
+
SWAHILI = 62,
|
84
|
+
SLOVENIAN = 63,
|
85
|
+
MARATHI = 64,
|
86
|
+
MALTESE = 65,
|
87
|
+
VIETNAMESE = 66,
|
88
|
+
FRISIAN = 67, // UI only.
|
89
|
+
SLOVAK = 68,
|
90
|
+
CHINESE_T = 69, // This is added to solve the problem of
|
91
|
+
// distinguishing Traditional and Simplified
|
92
|
+
// Chinese when the encoding is UTF8.
|
93
|
+
FAROESE = 70, // UI only.
|
94
|
+
SUNDANESE = 71, // UI only.
|
95
|
+
UZBEK = 72,
|
96
|
+
AMHARIC = 73,
|
97
|
+
AZERBAIJANI = 74,
|
98
|
+
GEORGIAN = 75,
|
99
|
+
TIGRINYA = 76, // UI only.
|
100
|
+
PERSIAN = 77,
|
101
|
+
BOSNIAN = 78, // UI only. LangId language: CROATIAN (28)
|
102
|
+
SINHALESE = 79,
|
103
|
+
NORWEGIAN_N = 80, // UI only. LangId language: NORWEGIAN (10)
|
104
|
+
PORTUGUESE_P = 81, // UI only. LangId language: PORTUGUESE (12)
|
105
|
+
PORTUGUESE_B = 82, // UI only. LangId language: PORTUGUESE (12)
|
106
|
+
XHOSA = 83, // UI only.
|
107
|
+
ZULU = 84, // UI only.
|
108
|
+
GUARANI = 85,
|
109
|
+
SESOTHO = 86, // UI only.
|
110
|
+
TURKMEN = 87, // UI only.
|
111
|
+
KYRGYZ = 88,
|
112
|
+
BRETON = 89, // UI only.
|
113
|
+
TWI = 90, // UI only.
|
114
|
+
YIDDISH = 91, // UI only.
|
115
|
+
SERBO_CROATIAN= 92, // UI only. LangId language: SERBIAN (29)
|
116
|
+
SOMALI = 93, // UI only.
|
117
|
+
UIGHUR = 94,
|
118
|
+
KURDISH = 95,
|
119
|
+
MONGOLIAN = 96,
|
120
|
+
ARMENIAN = 97,
|
121
|
+
LAOTHIAN = 98,
|
122
|
+
SINDHI = 99,
|
123
|
+
RHAETO_ROMANCE= 100, // UI only.
|
124
|
+
AFRIKAANS = 101,
|
125
|
+
LUXEMBOURGISH = 102, // UI only.
|
126
|
+
BURMESE = 103,
|
127
|
+
KHMER = 104,
|
128
|
+
TIBETAN = 105,
|
129
|
+
DHIVEHI = 106, // sometimes spelled Divehi, lang of Maldives
|
130
|
+
CHEROKEE = 107,
|
131
|
+
SYRIAC = 108, // UI only.
|
132
|
+
LIMBU = 109, // UI only.
|
133
|
+
ORIYA = 110,
|
134
|
+
ASSAMESE = 111, // UI only.
|
135
|
+
CORSICAN = 112, // UI only.
|
136
|
+
INTERLINGUE = 113, // UI only.
|
137
|
+
KAZAKH = 114,
|
138
|
+
LINGALA = 115, // UI only.
|
139
|
+
MOLDAVIAN = 116, // UI only. LangId language: ROMANIAN (22)
|
140
|
+
PASHTO = 117,
|
141
|
+
QUECHUA = 118, // UI only.
|
142
|
+
SHONA = 119, // UI only.
|
143
|
+
TAJIK = 120,
|
144
|
+
TATAR = 121, // UI only.
|
145
|
+
TONGA = 122, // UI only.
|
146
|
+
YORUBA = 123, // UI only.
|
147
|
+
CREOLES_AND_PIDGINS_ENGLISH_BASED = 124, // UI only.
|
148
|
+
CREOLES_AND_PIDGINS_FRENCH_BASED = 125, // UI only.
|
149
|
+
CREOLES_AND_PIDGINS_PORTUGUESE_BASED = 126, // UI only.
|
150
|
+
CREOLES_AND_PIDGINS_OTHER = 127, // UI only.
|
151
|
+
MAORI = 128, // UI only.
|
152
|
+
WOLOF = 129, // UI only.
|
153
|
+
ABKHAZIAN = 130, // UI only.
|
154
|
+
AFAR = 131, // UI only.
|
155
|
+
AYMARA = 132, // UI only.
|
156
|
+
BASHKIR = 133, // UI only.
|
157
|
+
BISLAMA = 134, // UI only.
|
158
|
+
DZONGKHA = 135, // UI only.
|
159
|
+
FIJIAN = 136, // UI only.
|
160
|
+
GREENLANDIC = 137, // UI only.
|
161
|
+
HAUSA = 138, // UI only.
|
162
|
+
HAITIAN_CREOLE= 139, // UI only.
|
163
|
+
INUPIAK = 140, // UI only.
|
164
|
+
INUKTITUT = 141,
|
165
|
+
KASHMIRI = 142, // UI only.
|
166
|
+
KINYARWANDA = 143, // UI only.
|
167
|
+
MALAGASY = 144, // UI only.
|
168
|
+
NAURU = 145, // UI only.
|
169
|
+
OROMO = 146, // UI only.
|
170
|
+
RUNDI = 147, // UI only.
|
171
|
+
SAMOAN = 148, // UI only.
|
172
|
+
SANGO = 149, // UI only.
|
173
|
+
SANSKRIT = 150,
|
174
|
+
SISWANT = 151, // UI only.
|
175
|
+
TSONGA = 152, // UI only.
|
176
|
+
TSWANA = 153, // UI only.
|
177
|
+
VOLAPUK = 154, // UI only.
|
178
|
+
ZHUANG = 155, // UI only.
|
179
|
+
KHASI = 156, // UI only.
|
180
|
+
SCOTS = 157, // UI only.
|
181
|
+
GANDA = 158, // UI only.
|
182
|
+
MANX = 159, // UI only.
|
183
|
+
MONTENEGRIN = 160, // UI only. LangId language: SERBIAN (29)
|
184
|
+
NUM_LANGUAGES = 161, // Always keep this at the end. It is not a
|
185
|
+
// valid Language enum. It is only used to
|
186
|
+
// indicate the total number of Languages.
|
187
|
+
// NOTE: If you add a language, you will break a unittest. See the note
|
188
|
+
// at the top of this enum.
|
189
|
+
};
|
190
|
+
|
191
|
+
#endif // UTIL_LANGUAGES_LANGUAGES_PB_H_
|
@@ -0,0 +1,25 @@
|
|
1
|
+
// Copyright 2016 Google Inc.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
////////////////////////////////////////////////////////////////////////////////
|
16
|
+
|
17
|
+
#ifndef UTIL_LOGGING_H_
|
18
|
+
#define UTIL_LOGGING_H_
|
19
|
+
|
20
|
+
#undef CHECK
|
21
|
+
#define CHECK(expr)
|
22
|
+
#undef DCHECK
|
23
|
+
#define DCHECK(expr)
|
24
|
+
|
25
|
+
#endif // UTIL_LOGGING_H_
|
@@ -0,0 +1,53 @@
|
|
1
|
+
// Copyright 2016 Google Inc.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
//
|
15
|
+
////////////////////////////////////////////////////////////////////////////////
|
16
|
+
|
17
|
+
#ifndef UTIL_PORT_H_
|
18
|
+
#define UTIL_PORT_H_
|
19
|
+
|
20
|
+
#include <stdarg.h>
|
21
|
+
|
22
|
+
#if defined(_MSC_VER)
|
23
|
+
#define GG_LONGLONG(x) x##I64
|
24
|
+
#define GG_ULONGLONG(x) x##UI64
|
25
|
+
#else
|
26
|
+
#define GG_LONGLONG(x) x##LL
|
27
|
+
#define GG_ULONGLONG(x) x##ULL
|
28
|
+
#endif
|
29
|
+
|
30
|
+
// Per C99 7.8.14, define __STDC_CONSTANT_MACROS before including <stdint.h>
|
31
|
+
// to get the INTn_C and UINTn_C macros for integer constants. It's difficult
|
32
|
+
// to guarantee any specific ordering of header includes, so it's difficult to
|
33
|
+
// guarantee that the INTn_C macros can be defined by including <stdint.h> at
|
34
|
+
// any specific point. Provide GG_INTn_C macros instead.
|
35
|
+
|
36
|
+
#define GG_INT8_C(x) (x)
|
37
|
+
#define GG_INT16_C(x) (x)
|
38
|
+
#define GG_INT32_C(x) (x)
|
39
|
+
#define GG_INT64_C(x) GG_LONGLONG(x)
|
40
|
+
|
41
|
+
#define GG_UINT8_C(x) (x ## U)
|
42
|
+
#define GG_UINT16_C(x) (x ## U)
|
43
|
+
#define GG_UINT32_C(x) (x ## U)
|
44
|
+
#define GG_UINT64_C(x) GG_ULONGLONG(x)
|
45
|
+
|
46
|
+
// Define an OS-neutral wrapper for shared library entry points
|
47
|
+
#if defined(_WIN32)
|
48
|
+
#define API_CALL __stdcall
|
49
|
+
#else
|
50
|
+
#define API_CALL
|
51
|
+
#endif
|
52
|
+
|
53
|
+
#endif // UTIL_PORT_H_
|