compact_enc_det 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. checksums.yaml +7 -0
  2. data/ext/compact_enc_det/compact_enc_det/CMakeLists.txt +103 -0
  3. data/ext/compact_enc_det/compact_enc_det/LICENSE +202 -0
  4. data/ext/compact_enc_det/compact_enc_det/README.md +46 -0
  5. data/ext/compact_enc_det/compact_enc_det/autogen.sh +74 -0
  6. data/ext/compact_enc_det/compact_enc_det/compact_enc_det/compact_enc_det.cc +5719 -0
  7. data/ext/compact_enc_det/compact_enc_det/compact_enc_det/compact_enc_det.h +83 -0
  8. data/ext/compact_enc_det/compact_enc_det/compact_enc_det/compact_enc_det_fuzz_test.cc +54 -0
  9. data/ext/compact_enc_det/compact_enc_det/compact_enc_det/compact_enc_det_generated_tables.h +6326 -0
  10. data/ext/compact_enc_det/compact_enc_det/compact_enc_det/compact_enc_det_generated_tables2.h +856 -0
  11. data/ext/compact_enc_det/compact_enc_det/compact_enc_det/compact_enc_det_hint_code.cc +169 -0
  12. data/ext/compact_enc_det/compact_enc_det/compact_enc_det/compact_enc_det_hint_code.h +45 -0
  13. data/ext/compact_enc_det/compact_enc_det/compact_enc_det/compact_enc_det_unittest.cc +5260 -0
  14. data/ext/compact_enc_det/compact_enc_det/compact_enc_det/detail_head_string.inc +152 -0
  15. data/ext/compact_enc_det/compact_enc_det/util/basictypes.h +331 -0
  16. data/ext/compact_enc_det/compact_enc_det/util/case_insensitive_hash.h +88 -0
  17. data/ext/compact_enc_det/compact_enc_det/util/commandlineflags.h +39 -0
  18. data/ext/compact_enc_det/compact_enc_det/util/encodings/encodings.cc +891 -0
  19. data/ext/compact_enc_det/compact_enc_det/util/encodings/encodings.h +299 -0
  20. data/ext/compact_enc_det/compact_enc_det/util/encodings/encodings.pb.h +181 -0
  21. data/ext/compact_enc_det/compact_enc_det/util/encodings/encodings_unittest.cc +34 -0
  22. data/ext/compact_enc_det/compact_enc_det/util/languages/languages.cc +349 -0
  23. data/ext/compact_enc_det/compact_enc_det/util/languages/languages.h +381 -0
  24. data/ext/compact_enc_det/compact_enc_det/util/languages/languages.pb.h +191 -0
  25. data/ext/compact_enc_det/compact_enc_det/util/logging.h +25 -0
  26. data/ext/compact_enc_det/compact_enc_det/util/port.h +53 -0
  27. data/ext/compact_enc_det/compact_enc_det/util/string_util.h +61 -0
  28. data/ext/compact_enc_det/compact_enc_det/util/varsetter.h +66 -0
  29. data/ext/compact_enc_det/compact_enc_det.cc +100 -0
  30. data/ext/compact_enc_det/extconf.rb +20 -0
  31. data/lib/compact_enc_det/version.rb +3 -0
  32. data/lib/compact_enc_det.rb +2 -0
  33. metadata +106 -0
@@ -0,0 +1,381 @@
1
+ // Copyright 2016 Google Inc.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ ////////////////////////////////////////////////////////////////////////////////
16
+
17
+ #ifndef UTIL_LANGUAGES_LANGUAGES_H_
18
+ #define UTIL_LANGUAGES_LANGUAGES_H_
19
+
20
+ // This interface defines the Language enum and functions that depend
21
+ // only on Language values.
22
+
23
+ // A hash-function for Language, hash<Language>, is defined in
24
+ // i18n/languages/public/languages-hash.h
25
+
26
+ #ifndef SWIG
27
+ // Language enum defined in languages.proto
28
+ // Also description on how to add languages.
29
+ #include "util/languages/languages.pb.h"
30
+
31
+ #else
32
+
33
+ // TODO: Include a header containing swig-compatible enum.
34
+
35
+ #endif
36
+
37
+ const int kNumLanguages = NUM_LANGUAGES;
38
+
39
+ // Return the default language (ENGLISH).
40
+ Language default_language();
41
+
42
+
43
+ // *******************************************
44
+ // Language predicates
45
+ // IsValidLanguage()
46
+ // IS_LANGUAGE_UNKNOWN()
47
+ // IsCJKLanguage()
48
+ // IsChineseLanguage()
49
+ // IsNorwegianLanguage()
50
+ // IsPortugueseLanguage()
51
+ // IsRightToLeftLanguage()
52
+ // IsMaybeRightToLeftLanguage()
53
+ // IsSameLanguage()
54
+ // IsScriptRequiringLongerSnippets()
55
+ // *******************************************
56
+
57
+ // IsValidLanguage
58
+ // ===============
59
+ //
60
+ // Function to check if the input is within range of the Language enum. If
61
+ // IsValidLanguage(lang) returns true, it is safe to call
62
+ // static_cast<Language>(lang).
63
+ //
64
+ inline bool IsValidLanguage(int lang) {
65
+ return ((lang >= 0) && (lang < kNumLanguages));
66
+ }
67
+
68
+ // Return true if the language is "unknown". (This function was
69
+ // previously a macro, hence the spelling in all caps.)
70
+ //
71
+ inline bool IS_LANGUAGE_UNKNOWN(Language lang) {
72
+ return lang == TG_UNKNOWN_LANGUAGE || lang == UNKNOWN_LANGUAGE;
73
+ }
74
+
75
+ // IsCJKLanguage
76
+ // -------------
77
+ //
78
+ // This function returns true if the language is either Chinese
79
+ // (simplified or traditional), Japanese, or Korean.
80
+ bool IsCJKLanguage(Language lang);
81
+
82
+ // IsChineseLanguage
83
+ // -----------------
84
+ //
85
+ // This function returns true if the language is either Chinese
86
+ // (simplified or traditional)
87
+ bool IsChineseLanguage(Language lang);
88
+
89
+ // IsNorwegianLanguage
90
+ // --------------------
91
+ //
92
+ // This function returns true if the language is any of the Norwegian
93
+ // (regular or Nynorsk).
94
+ bool IsNorwegianLanguage(Language lang);
95
+
96
+ // IsPortugueseLanguage
97
+ // --------------------
98
+ //
99
+ // This function returns true if the language is any of the Portuguese
100
+ // languages (regular, Portugal or Brazil)
101
+ bool IsPortugueseLanguage(Language lang);
102
+
103
+ // IsSameLanguage
104
+ // --------------
105
+ //
106
+ // WARNING: This function provides only a simple test on the values of
107
+ // the two Language arguments. It returns false if either language is
108
+ // invalid. It returns true if the language arguments are equal, or
109
+ // if they are both Chinese languages, both Norwegian languages, or
110
+ // both Portuguese languages, as defined by IsChineseLanguage,
111
+ // IsNorwegianLanguage, and IsPortugueseLanguage. Otherwise it returns
112
+ // false.
113
+ bool IsSameLanguage(Language lang1, Language lang2);
114
+
115
+
116
+ // IsRightToLeftLanguage
117
+ // ---------------------
118
+ //
119
+ // This function returns true if the language is only written right-to-left
120
+ // (E.g., Hebrew, Arabic, Persian etc.)
121
+ //
122
+ // IMPORTANT NOTE: Technically we're talking about scripts, not languages.
123
+ // There are languages that can be written in more than one script.
124
+ // Examples:
125
+ // - Kurdish and Azeri ('AZERBAIJANI') can be written left-to-right in
126
+ // Latin or Cyrillic script, and right-to-left in Arabic script.
127
+ // - Sindhi and Punjabi are written in different scripts, depending on
128
+ // region and dialect.
129
+ // - Turkmen used an Arabic script historically, but not any more.
130
+ // - Pashto and Uyghur can use Arabic script, but use a Roman script
131
+ // on the Internet.
132
+ // - Kashmiri and Urdu are written either with Arabic or Devanagari script.
133
+ //
134
+ // This function only returns true for languages that are always, unequivocally
135
+ // written in right-to-left script.
136
+ //
137
+ // TODO: If we want to do anything special with multi-script languages
138
+ // we should create new 'languages' for each language+script, as we do for
139
+ // traditional vs. simplified Chinese. However most such languages are rare in
140
+ // use and even rarer on the web, so this is unlikely to be something we'll
141
+ // be concerned with for a while.
142
+ bool IsRightToLeftLanguage(Language lang);
143
+
144
+ // IsMaybeRightToLeftLanguage
145
+ // --------------------------
146
+ //
147
+ // This function returns true if the language may appear on the web in a
148
+ // right-to-left script (E.g., Hebrew, Arabic, Persian, Urdu, Kurdish, etc.)
149
+ //
150
+ // NOTE: See important notes under IsRightToLeftLanguage(...).
151
+ //
152
+ // This function returns true for languages that *may* appear on the web in a
153
+ // right-to-left script, even if they may also appear in a left-to-right
154
+ // script.
155
+ //
156
+ // This function should typically be used in cases where doing some work on
157
+ // left-to-right text would be OK (usually a no-op), and this function is used
158
+ // just to cut down on unnecessary work on regular, LTR text.
159
+ bool IsMaybeRightToLeftLanguage(Language lang);
160
+
161
+ // IsScriptRequiringLongerSnippets
162
+ // --------------------
163
+ //
164
+ // This function returns true if the script chracteristics require longer
165
+ // snippet length (Devanagari, Bengali, Gurmukhi,
166
+ // Gujarati, Oriya, Tamil, Telugu, Kannada, Malayalam).
167
+ // COMMENTED OUT TO REDUCE DEPENDENCIES ON GOOGLE3 CODE
168
+ // bool IsScriptRequiringLongerSnippets(UnicodeScript script);
169
+
170
+
171
+ // *******************************************
172
+ // LANGUAGE NAMES
173
+ //
174
+ // This interface defines a standard name for each valid Language,
175
+ // and a standard name for invalid languages. Some language names use all
176
+ // uppercase letters, but others use mixed case.
177
+ // LanguageName() [Language to name]
178
+ // LanguageEnumName() [language to enum name]
179
+ // LanguageFromName() [name to Language]
180
+ // default_language_name()
181
+ // invalid_language_name()
182
+ // *******************************************
183
+
184
+ // Given a Language, returns its standard name.
185
+ // Return invalid_language_name() if the language is invalid.
186
+ const char* LanguageName(Language lang);
187
+
188
+ // Given a Language, return the name of the enum constant for that
189
+ // language. In all but a few cases, this is the same as its standard
190
+ // name. For example, LanguageName(CHINESE) returns "Chinese", but
191
+ // LanguageEnumName(CHINESE) returns "CHINESE". This is intended for
192
+ // code that is generating C++ code, where the enum constant is more
193
+ // useful than its integer value. Return "NUM_LANGUAGES" if
194
+ // the language is invalid.
195
+ const char* LanguageEnumName(Language lang);
196
+
197
+ // The maximum length of a standard language name.
198
+ const int kMaxLanguageNameSize = 50;
199
+
200
+ // The standard name for the default language.
201
+ const char* default_language_name();
202
+
203
+ // The standard name for all invalid languages.
204
+ const char* invalid_language_name();
205
+
206
+ // If lang_name matches the standard name of a Language, using a
207
+ // case-insensitive comparison, set *language to that Language and
208
+ // return true.
209
+ // Otherwise, set *language to UNKNOWN_LANGUAGE and return false.
210
+ //
211
+ // For backwards compatibility, "HATIAN_CREOLE" is allowed as a name
212
+ // for HAITIAN_CREOLE, and "QUECHAU" is allowed as a name for QUECHUA.
213
+ // For compatibility with LanguageEnumName, "UNKNOWN_LANGUAGE" is allowed
214
+ // as a name for UNKNOWN_LANGUAGE (the return value is true in this case,
215
+ // as it is for "Unknown"), and "CHINESE_T" is allowed as a name for
216
+ // CHINESE_T (i.e., a synonym for "ChineseT").
217
+ //
218
+ // REQUIRES: language must not be NULL.
219
+ //
220
+ bool LanguageFromName(const char* lang_name, Language *language);
221
+
222
+
223
+
224
+ // *******************************************
225
+ // LANGUAGE CODES
226
+ //
227
+ // This interface defines a standard code for each valid language, and
228
+ // a standard code for invalid languages. These are derived from ISO codes,
229
+ // with some Google additions.
230
+ // LanguageCode()
231
+ // default_language_code()
232
+ // invalid_language_code()
233
+ // LanguageCodeWithDialects()
234
+ // LanguageCodeISO639_1()
235
+ // LanguageCodeISO639_2()
236
+ // *******************************************
237
+
238
+ // Given a Language, return its standard code. There are Google-specific codes:
239
+ // For CHINESE_T, return "zh-TW".
240
+ // For TG_UNKNOWN_LANGUAGE, return "ut".
241
+ // For UNKNOWN_LANGUAGE, return "un".
242
+ // For PORTUGUESE_P, return "pt-PT".
243
+ // For PORTUGUESE_B, return "pt-BR".
244
+ // For LIMBU, return "sit-NP".
245
+ // For CHEROKEE, return "chr".
246
+ // For SYRIAC, return "syr".
247
+ // Otherwise return the ISO 639-1 two-letter language code for lang.
248
+ // If lang is invalid, return invalid_language_code().
249
+ //
250
+ // NOTE: See the note below about the codes for Chinese languages.
251
+ //
252
+ const char* LanguageCode(Language lang);
253
+
254
+ // The maximum length of a language code.
255
+ const int kMaxLanguageCodeSize = 50;
256
+
257
+ // The standard code for the default language.
258
+ const char* default_language_code();
259
+
260
+ // The standard code for all invalid languages.
261
+ const char* invalid_language_code();
262
+
263
+
264
+ // --------------------------------------------
265
+ // NOTE: CHINESE LANGUAGE CODES
266
+ //
267
+ // There are three functions that return codes for Chinese languages.
268
+ // LanguageCode(lang) and LanguageCodeWithDialects(lang) are defined here.
269
+ // LanguageCode(lang, encoding) is defined in i18n/encodings.lang_enc.h.
270
+ // The following list shows the different results.
271
+ //
272
+ // LanguageCode(CHINESE) returns "zh"
273
+ // LanguageCode(CHINESE_T) returns "zh-TW".
274
+ //
275
+ // LanguageCodeWithDialects(CHINESE) returns "zh-CN".
276
+ // LanguageCodeWithDialects(CHINESE_T) returns "zh-TW".
277
+ //
278
+ // LanguageCode(CHINESE_T, <any encoding>) returns "zh-TW".
279
+ // LanguageCode(CHINESE, CHINESE_BIG5) returns "zh-TW".
280
+ // LanguageCode(CHINESE, <any other encoding>) returns "zh-CN".
281
+ //
282
+ // --------------------------------------------
283
+
284
+ // LanguageCodeWithDialects
285
+ // ------------------------
286
+ //
287
+ // If lang is CHINESE, return "zh-CN". Otherwise return LanguageCode(lang).
288
+ const char* LanguageCodeWithDialects(Language lang);
289
+
290
+ // LanguageCodeISO639_1
291
+ // --------------------
292
+ //
293
+ // Return the ISO 639-1 two-letter language code for lang.
294
+ // Return invalid_language_code() if lang is invalid or does not have
295
+ // an ISO 639-1 two-letter language code.
296
+ const char* LanguageCodeISO639_1(Language lang);
297
+
298
+ // LanguageCodeISO639_2
299
+ // --------------------
300
+ //
301
+ // Return the ISO 639-2 three-letter language for lang.
302
+ // Return invalid_language_code() if lang is invalid or does not have
303
+ // an ISO 639-2 three-letter language code.
304
+ const char* LanguageCodeISO639_2(Language lang);
305
+
306
+ // LanguageFromCode
307
+ // ----------------
308
+ //
309
+ // If lang_code matches the code for a Language, using a case-insensitive
310
+ // comparison, set *lang to that Language and return true.
311
+ // Otherwise, set *lang to UNKNOWN_LANGUAGE and return false.
312
+ //
313
+ // lang_code can be an ISO 639-1 (two-letter) code, an ISO 639-2
314
+ // (three-letter) code, or a Google-specific code (see LanguageCode).
315
+ //
316
+ // Certain language-code aliases are also allowed:
317
+ // For "zh-cn" and "zh_cn", set *lang to CHINESE.
318
+ // For "zh-tw" and "zh_tw", set *lang to CHINESE_T.
319
+ // For "he", set *lang to HEBREW.
320
+ // For "in", set *lang to INDONESIAN.
321
+ // For "ji", set *lang to YIDDISH.
322
+ // For "fil", set *lang to TAGALOG.
323
+ //
324
+ // REQUIRES: 'lang' must not be NULL.
325
+ bool LanguageFromCode(const char* lang_code, Language *language);
326
+
327
+
328
+ // LanguageFromCodeOrName
329
+ // ----------------------
330
+ //
331
+ // If lang_code_or_name is a language code or a language name.
332
+ // set *language to the corresponding Language and return true.
333
+ // Otherwise set *language to UNKNOWN_LANGUAGE and return false.
334
+ //
335
+ bool LanguageFromCodeOrName(const char* lang_code_or_name,
336
+ Language* language);
337
+
338
+ // LanguageNameFromCode
339
+ // --------------------
340
+ //
341
+ // If language_code is the code for a Language (see LanguageFromCode),
342
+ // return the standard name of that language (see LanguageName).
343
+ // Otherwise return invalid_language_name().
344
+ //
345
+ const char* LanguageNameFromCode(const char* language_code);
346
+
347
+
348
+ // Miscellany
349
+
350
+ // LanguageCodeToUnderscoreForm
351
+ // ----------------------------
352
+ //
353
+ // Given a language code, convert the dash "-" to underscore "_".
354
+ //
355
+ // Specifically, if result_length <= strlen(lang_code), set result[0]
356
+ // to '\0' and return false. Otherwise, copy lang_code to result,
357
+ // converting every dash to an underscore, converting every character
358
+ // before the first dash or underscore to lower case, and converting
359
+ // every character after the first dash or underscore to upper
360
+ // case. If there is no dash or underscore, convert the entire string
361
+ // to lower case.
362
+ //
363
+ // REQUIRES: 'lang_code' must not be NULL. 'result' must not be NULL.
364
+
365
+ bool LanguageCodeToUnderscoreForm(const char* lang_code,
366
+ char* result,
367
+ int result_length);
368
+
369
+ //
370
+ // AlwaysPutInExpectedRestrict
371
+ // ---------------------------
372
+ //
373
+ // For Web pages in certain top-level domains, Web Search always
374
+ // applies a "country restrict". If 'tld' matches one of those, using
375
+ // a case-SENSITIVE comparison, set *expected_language to the Language
376
+ // most commonly found in that top-level domain and return true.
377
+ // Otherwise, set *expected_language to UNKNOWN_LANGUAGE and return false.
378
+ bool AlwaysPutInExpectedRestrict(const char *tld, Language *expected_language);
379
+
380
+
381
+ #endif // UTIL_LANGUAGES_LANGUAGES_H_
@@ -0,0 +1,191 @@
1
+ // Copyright 2016 Google Inc.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ ////////////////////////////////////////////////////////////////////////////////
16
+
17
+ #ifndef UTIL_LANGUAGES_LANGUAGES_PB_H_
18
+ #define UTIL_LANGUAGES_LANGUAGES_PB_H_
19
+
20
+ enum Language {
21
+ ENGLISH = 0,
22
+ DANISH = 1,
23
+ DUTCH = 2,
24
+ FINNISH = 3,
25
+ FRENCH = 4,
26
+ GERMAN = 5,
27
+ HEBREW = 6,
28
+ ITALIAN = 7,
29
+ JAPANESE = 8,
30
+ KOREAN = 9,
31
+ NORWEGIAN = 10,
32
+ POLISH = 11,
33
+ PORTUGUESE = 12,
34
+ RUSSIAN = 13,
35
+ SPANISH = 14,
36
+ SWEDISH = 15,
37
+ CHINESE = 16,
38
+ CZECH = 17,
39
+ GREEK = 18,
40
+ ICELANDIC = 19,
41
+ LATVIAN = 20,
42
+ LITHUANIAN = 21,
43
+ ROMANIAN = 22,
44
+ HUNGARIAN = 23,
45
+ ESTONIAN = 24,
46
+ TG_UNKNOWN_LANGUAGE = 25,
47
+ UNKNOWN_LANGUAGE = 26,
48
+ BULGARIAN = 27,
49
+ CROATIAN = 28,
50
+ SERBIAN = 29,
51
+ IRISH = 30, // UI only.
52
+ GALICIAN = 31,
53
+ TAGALOG = 32, // Tagalog (tl) + Filipino (fil),
54
+ TURKISH = 33,
55
+ UKRAINIAN = 34,
56
+ HINDI = 35,
57
+ MACEDONIAN = 36,
58
+ BENGALI = 37,
59
+ INDONESIAN = 38,
60
+ LATIN = 39, // UI only.
61
+ MALAY = 40,
62
+ MALAYALAM = 41,
63
+ WELSH = 42, // UI only.
64
+ NEPALI = 43,
65
+ TELUGU = 44,
66
+ ALBANIAN = 45,
67
+ TAMIL = 46,
68
+ BELARUSIAN = 47,
69
+ JAVANESE = 48, // UI only.
70
+ OCCITAN = 49, // UI only.
71
+ URDU = 50,
72
+ BIHARI = 51,
73
+ GUJARATI = 52,
74
+ THAI = 53,
75
+ ARABIC = 54,
76
+ CATALAN = 55,
77
+ ESPERANTO = 56,
78
+ BASQUE = 57,
79
+ INTERLINGUA = 58, // UI only.
80
+ KANNADA = 59,
81
+ PUNJABI = 60,
82
+ SCOTS_GAELIC = 61, // UI only.
83
+ SWAHILI = 62,
84
+ SLOVENIAN = 63,
85
+ MARATHI = 64,
86
+ MALTESE = 65,
87
+ VIETNAMESE = 66,
88
+ FRISIAN = 67, // UI only.
89
+ SLOVAK = 68,
90
+ CHINESE_T = 69, // This is added to solve the problem of
91
+ // distinguishing Traditional and Simplified
92
+ // Chinese when the encoding is UTF8.
93
+ FAROESE = 70, // UI only.
94
+ SUNDANESE = 71, // UI only.
95
+ UZBEK = 72,
96
+ AMHARIC = 73,
97
+ AZERBAIJANI = 74,
98
+ GEORGIAN = 75,
99
+ TIGRINYA = 76, // UI only.
100
+ PERSIAN = 77,
101
+ BOSNIAN = 78, // UI only. LangId language: CROATIAN (28)
102
+ SINHALESE = 79,
103
+ NORWEGIAN_N = 80, // UI only. LangId language: NORWEGIAN (10)
104
+ PORTUGUESE_P = 81, // UI only. LangId language: PORTUGUESE (12)
105
+ PORTUGUESE_B = 82, // UI only. LangId language: PORTUGUESE (12)
106
+ XHOSA = 83, // UI only.
107
+ ZULU = 84, // UI only.
108
+ GUARANI = 85,
109
+ SESOTHO = 86, // UI only.
110
+ TURKMEN = 87, // UI only.
111
+ KYRGYZ = 88,
112
+ BRETON = 89, // UI only.
113
+ TWI = 90, // UI only.
114
+ YIDDISH = 91, // UI only.
115
+ SERBO_CROATIAN= 92, // UI only. LangId language: SERBIAN (29)
116
+ SOMALI = 93, // UI only.
117
+ UIGHUR = 94,
118
+ KURDISH = 95,
119
+ MONGOLIAN = 96,
120
+ ARMENIAN = 97,
121
+ LAOTHIAN = 98,
122
+ SINDHI = 99,
123
+ RHAETO_ROMANCE= 100, // UI only.
124
+ AFRIKAANS = 101,
125
+ LUXEMBOURGISH = 102, // UI only.
126
+ BURMESE = 103,
127
+ KHMER = 104,
128
+ TIBETAN = 105,
129
+ DHIVEHI = 106, // sometimes spelled Divehi, lang of Maldives
130
+ CHEROKEE = 107,
131
+ SYRIAC = 108, // UI only.
132
+ LIMBU = 109, // UI only.
133
+ ORIYA = 110,
134
+ ASSAMESE = 111, // UI only.
135
+ CORSICAN = 112, // UI only.
136
+ INTERLINGUE = 113, // UI only.
137
+ KAZAKH = 114,
138
+ LINGALA = 115, // UI only.
139
+ MOLDAVIAN = 116, // UI only. LangId language: ROMANIAN (22)
140
+ PASHTO = 117,
141
+ QUECHUA = 118, // UI only.
142
+ SHONA = 119, // UI only.
143
+ TAJIK = 120,
144
+ TATAR = 121, // UI only.
145
+ TONGA = 122, // UI only.
146
+ YORUBA = 123, // UI only.
147
+ CREOLES_AND_PIDGINS_ENGLISH_BASED = 124, // UI only.
148
+ CREOLES_AND_PIDGINS_FRENCH_BASED = 125, // UI only.
149
+ CREOLES_AND_PIDGINS_PORTUGUESE_BASED = 126, // UI only.
150
+ CREOLES_AND_PIDGINS_OTHER = 127, // UI only.
151
+ MAORI = 128, // UI only.
152
+ WOLOF = 129, // UI only.
153
+ ABKHAZIAN = 130, // UI only.
154
+ AFAR = 131, // UI only.
155
+ AYMARA = 132, // UI only.
156
+ BASHKIR = 133, // UI only.
157
+ BISLAMA = 134, // UI only.
158
+ DZONGKHA = 135, // UI only.
159
+ FIJIAN = 136, // UI only.
160
+ GREENLANDIC = 137, // UI only.
161
+ HAUSA = 138, // UI only.
162
+ HAITIAN_CREOLE= 139, // UI only.
163
+ INUPIAK = 140, // UI only.
164
+ INUKTITUT = 141,
165
+ KASHMIRI = 142, // UI only.
166
+ KINYARWANDA = 143, // UI only.
167
+ MALAGASY = 144, // UI only.
168
+ NAURU = 145, // UI only.
169
+ OROMO = 146, // UI only.
170
+ RUNDI = 147, // UI only.
171
+ SAMOAN = 148, // UI only.
172
+ SANGO = 149, // UI only.
173
+ SANSKRIT = 150,
174
+ SISWANT = 151, // UI only.
175
+ TSONGA = 152, // UI only.
176
+ TSWANA = 153, // UI only.
177
+ VOLAPUK = 154, // UI only.
178
+ ZHUANG = 155, // UI only.
179
+ KHASI = 156, // UI only.
180
+ SCOTS = 157, // UI only.
181
+ GANDA = 158, // UI only.
182
+ MANX = 159, // UI only.
183
+ MONTENEGRIN = 160, // UI only. LangId language: SERBIAN (29)
184
+ NUM_LANGUAGES = 161, // Always keep this at the end. It is not a
185
+ // valid Language enum. It is only used to
186
+ // indicate the total number of Languages.
187
+ // NOTE: If you add a language, you will break a unittest. See the note
188
+ // at the top of this enum.
189
+ };
190
+
191
+ #endif // UTIL_LANGUAGES_LANGUAGES_PB_H_
@@ -0,0 +1,25 @@
1
+ // Copyright 2016 Google Inc.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ ////////////////////////////////////////////////////////////////////////////////
16
+
17
+ #ifndef UTIL_LOGGING_H_
18
+ #define UTIL_LOGGING_H_
19
+
20
+ #undef CHECK
21
+ #define CHECK(expr)
22
+ #undef DCHECK
23
+ #define DCHECK(expr)
24
+
25
+ #endif // UTIL_LOGGING_H_
@@ -0,0 +1,53 @@
1
+ // Copyright 2016 Google Inc.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ ////////////////////////////////////////////////////////////////////////////////
16
+
17
+ #ifndef UTIL_PORT_H_
18
+ #define UTIL_PORT_H_
19
+
20
+ #include <stdarg.h>
21
+
22
+ #if defined(_MSC_VER)
23
+ #define GG_LONGLONG(x) x##I64
24
+ #define GG_ULONGLONG(x) x##UI64
25
+ #else
26
+ #define GG_LONGLONG(x) x##LL
27
+ #define GG_ULONGLONG(x) x##ULL
28
+ #endif
29
+
30
+ // Per C99 7.8.14, define __STDC_CONSTANT_MACROS before including <stdint.h>
31
+ // to get the INTn_C and UINTn_C macros for integer constants. It's difficult
32
+ // to guarantee any specific ordering of header includes, so it's difficult to
33
+ // guarantee that the INTn_C macros can be defined by including <stdint.h> at
34
+ // any specific point. Provide GG_INTn_C macros instead.
35
+
36
+ #define GG_INT8_C(x) (x)
37
+ #define GG_INT16_C(x) (x)
38
+ #define GG_INT32_C(x) (x)
39
+ #define GG_INT64_C(x) GG_LONGLONG(x)
40
+
41
+ #define GG_UINT8_C(x) (x ## U)
42
+ #define GG_UINT16_C(x) (x ## U)
43
+ #define GG_UINT32_C(x) (x ## U)
44
+ #define GG_UINT64_C(x) GG_ULONGLONG(x)
45
+
46
+ // Define an OS-neutral wrapper for shared library entry points
47
+ #if defined(_WIN32)
48
+ #define API_CALL __stdcall
49
+ #else
50
+ #define API_CALL
51
+ #endif
52
+
53
+ #endif // UTIL_PORT_H_