language_detection 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. data/.gitignore +19 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +85 -0
  5. data/Rakefile +11 -0
  6. data/ext/cld/Makefile +34 -0
  7. data/ext/cld/base/basictypes.h +348 -0
  8. data/ext/cld/base/build_config.h +124 -0
  9. data/ext/cld/base/casts.h +156 -0
  10. data/ext/cld/base/commandlineflags.h +443 -0
  11. data/ext/cld/base/crash.h +41 -0
  12. data/ext/cld/base/dynamic_annotations.h +358 -0
  13. data/ext/cld/base/global_strip_options.h +59 -0
  14. data/ext/cld/base/log_severity.h +46 -0
  15. data/ext/cld/base/logging.h +1403 -0
  16. data/ext/cld/base/macros.h +243 -0
  17. data/ext/cld/base/port.h +54 -0
  18. data/ext/cld/base/scoped_ptr.h +428 -0
  19. data/ext/cld/base/stl_decl.h +0 -0
  20. data/ext/cld/base/stl_decl_msvc.h +107 -0
  21. data/ext/cld/base/string_util.h +29 -0
  22. data/ext/cld/base/strtoint.h +93 -0
  23. data/ext/cld/base/template_util.h +96 -0
  24. data/ext/cld/base/type_traits.h +198 -0
  25. data/ext/cld/base/vlog_is_on.h +143 -0
  26. data/ext/cld/cld.so +0 -0
  27. data/ext/cld/encodings/compact_lang_det/cldutil.cc +905 -0
  28. data/ext/cld/encodings/compact_lang_det/cldutil.h +1205 -0
  29. data/ext/cld/encodings/compact_lang_det/cldutil_dbg.h +76 -0
  30. data/ext/cld/encodings/compact_lang_det/cldutil_dbg_empty.cc +76 -0
  31. data/ext/cld/encodings/compact_lang_det/compact_lang_det.cc +62 -0
  32. data/ext/cld/encodings/compact_lang_det/compact_lang_det.h +145 -0
  33. data/ext/cld/encodings/compact_lang_det/compact_lang_det_impl.cc +2574 -0
  34. data/ext/cld/encodings/compact_lang_det/compact_lang_det_impl.h +173 -0
  35. data/ext/cld/encodings/compact_lang_det/compact_lang_det_unittest_small.cc +406 -0
  36. data/ext/cld/encodings/compact_lang_det/compile.cmd +1 -0
  37. data/ext/cld/encodings/compact_lang_det/ext_lang_enc.cc +545 -0
  38. data/ext/cld/encodings/compact_lang_det/ext_lang_enc.h +119 -0
  39. data/ext/cld/encodings/compact_lang_det/generated/cld_generated_score_deltaoctachrome_0406.cc +380 -0
  40. data/ext/cld/encodings/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc +382 -0
  41. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc +49 -0
  42. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc +7119 -0
  43. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz_0.cc +61 -0
  44. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc +1263 -0
  45. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_longwords8_0.cc +53 -0
  46. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_meanscore.h +10 -0
  47. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_quads_0.cc +50 -0
  48. data/ext/cld/encodings/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc +70935 -0
  49. data/ext/cld/encodings/compact_lang_det/getonescriptspan.cc +570 -0
  50. data/ext/cld/encodings/compact_lang_det/getonescriptspan.h +131 -0
  51. data/ext/cld/encodings/compact_lang_det/letterscript_enum.cc +117 -0
  52. data/ext/cld/encodings/compact_lang_det/letterscript_enum.h +99 -0
  53. data/ext/cld/encodings/compact_lang_det/subsetsequence.cc +259 -0
  54. data/ext/cld/encodings/compact_lang_det/subsetsequence.h +44 -0
  55. data/ext/cld/encodings/compact_lang_det/subsetsequence_unittest.cc +99 -0
  56. data/ext/cld/encodings/compact_lang_det/tote.cc +299 -0
  57. data/ext/cld/encodings/compact_lang_det/tote.h +89 -0
  58. data/ext/cld/encodings/compact_lang_det/unittest_data.h +193 -0
  59. data/ext/cld/encodings/compact_lang_det/utf8propjustletter.h +1162 -0
  60. data/ext/cld/encodings/compact_lang_det/utf8propletterscriptnum.h +1222 -0
  61. data/ext/cld/encodings/compact_lang_det/utf8scannotjustletterspecial.h +1185 -0
  62. data/ext/cld/encodings/compact_lang_det/win/cld_basictypes.h +10 -0
  63. data/ext/cld/encodings/compact_lang_det/win/cld_commandlineflags.h +28 -0
  64. data/ext/cld/encodings/compact_lang_det/win/cld_google.h +18 -0
  65. data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils.h +13 -0
  66. data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils_google3.cc +32 -0
  67. data/ext/cld/encodings/compact_lang_det/win/cld_htmlutils_windows.cc +29 -0
  68. data/ext/cld/encodings/compact_lang_det/win/cld_logging.h +21 -0
  69. data/ext/cld/encodings/compact_lang_det/win/cld_macros.h +19 -0
  70. data/ext/cld/encodings/compact_lang_det/win/cld_strtoint.h +26 -0
  71. data/ext/cld/encodings/compact_lang_det/win/cld_unicodetext.cc +84 -0
  72. data/ext/cld/encodings/compact_lang_det/win/cld_unicodetext.h +40 -0
  73. data/ext/cld/encodings/compact_lang_det/win/cld_unilib.h +15 -0
  74. data/ext/cld/encodings/compact_lang_det/win/cld_unilib_google3.cc +18 -0
  75. data/ext/cld/encodings/compact_lang_det/win/cld_unilib_windows.cc +29 -0
  76. data/ext/cld/encodings/compact_lang_det/win/cld_utf.h +24 -0
  77. data/ext/cld/encodings/compact_lang_det/win/cld_utf8statetable.cc +224 -0
  78. data/ext/cld/encodings/compact_lang_det/win/cld_utf8statetable.h +141 -0
  79. data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils.h +22 -0
  80. data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils_google3.cc +18 -0
  81. data/ext/cld/encodings/compact_lang_det/win/cld_utf8utils_windows.cc +17 -0
  82. data/ext/cld/encodings/compact_lang_det/win/normalizedunicodetext.cc +172 -0
  83. data/ext/cld/encodings/compact_lang_det/win/normalizedunicodetext.h +67 -0
  84. data/ext/cld/encodings/internal/encodings.cc +12 -0
  85. data/ext/cld/encodings/lang_enc.h +254 -0
  86. data/ext/cld/encodings/proto/encodings.pb.h +169 -0
  87. data/ext/cld/encodings/public/encodings.h +301 -0
  88. data/ext/cld/extconf.rb +1 -0
  89. data/ext/cld/language_detection.cc +88 -0
  90. data/ext/cld/languages/internal/languages.cc +337 -0
  91. data/ext/cld/languages/proto/languages.pb.h +179 -0
  92. data/ext/cld/languages/public/languages.h +379 -0
  93. data/language_detection.gemspec +28 -0
  94. data/lib/language_detection/string.rb +1 -0
  95. data/lib/language_detection/version.rb +3 -0
  96. data/lib/language_detection.rb +54 -0
  97. data/test/_helper.rb +15 -0
  98. data/test/fixtures/languages.csv +80 -0
  99. data/test/language_detection_test.rb +88 -0
  100. metadata +250 -0
@@ -0,0 +1,76 @@
1
+ // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file.
4
+
5
+ #ifndef ENCODINGS_COMPACT_LANG_DET_CLDUTIL_DBG_H_
6
+ #define ENCODINGS_COMPACT_LANG_DET_CLDUTIL_DBG_H_
7
+
8
+ #include "encodings/compact_lang_det/cldutil.h"
9
+ #include <string>
10
+ #include "encodings/compact_lang_det/ext_lang_enc.h"
11
+ #include "encodings/compact_lang_det/tote.h"
12
+ #include "encodings/compact_lang_det/win/cld_basictypes.h"
13
+ #include "encodings/compact_lang_det/win/cld_commandlineflags.h"
14
+
15
+ DECLARE_bool(dbgscore);
16
+ DECLARE_bool(dbglookup);
17
+ DECLARE_bool(dbgreli);
18
+
19
+ using std::string;
20
+
21
+ namespace cld {
22
+
23
+
24
+ //------------------------------------------------------------------------------
25
+ // Debugging. Not thread safe
26
+ //------------------------------------------------------------------------------
27
+
28
+ void DbgScoreInit(const char* src, int len);
29
+
30
+ // Return a 3-byte + NUL code for language
31
+ void DbgLangName3(Language lang, char* temp);
32
+
33
+ // Show all per-language totals
34
+ void DbgScoreState();
35
+
36
+ void DbgScoreTop(const char* src, int srclen, Tote* chunk_tote);
37
+
38
+ void DbgScoreFlush();
39
+
40
+ // Allow additional scoring debug output
41
+ void DbgScoreRecord(const char* src, uint32 probs, int len);
42
+
43
+ void DbgScoreRecordUni(const char* src, int propval, int len);
44
+
45
+ // Debug print language name(s)
46
+ void PrintLang(FILE* f, const Tote* chunk_tote,
47
+ const Language cur_lang, const bool cur_unreliable,
48
+ Language prior_lang, bool prior_unreliable);
49
+
50
+ // Debug print language name(s)
51
+ void PrintLang2(FILE* f,
52
+ const Language lang1, const Language lang2, bool diff_prior);
53
+
54
+ // Debug print text span
55
+ void PrintText(FILE* f, Language cur_lang, const string& str);
56
+
57
+ // Debug print text span with speculative language
58
+ void PrintTextSpeculative(FILE* f, Language cur_lang, const string& str);
59
+
60
+ // Debug print ignored text span
61
+ void PrintSkippedText(FILE* f, const string& str);
62
+
63
+ void DbgProbsToStderr(uint32 probs);
64
+ void DbgUniTermToStderr(int propval, const uint8* usrc, int len);
65
+ // No pre/post space
66
+ void DbgBiTermToStderr(uint32 bihash, uint32 probs,
67
+ const char* src, int len);
68
+ void DbgQuadTermToStderr(uint32 quadhash, uint32 probs,
69
+ const char* src, int len);
70
+ void DbgWordTermToStderr(uint64 wordhash, uint32 probs,
71
+ const char* src, int len);
72
+
73
+ } // End namespace cld
74
+
75
+
76
+ #endif // ENCODINGS_COMPACT_LANG_DET_CLDUTIL_DBG_H_
@@ -0,0 +1,76 @@
1
+ // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file.
4
+
5
+ #include "encodings/compact_lang_det/cldutil_dbg.h"
6
+ //#include <string>
7
+
8
+ //#include "base/logging.h"
9
+ //#include "i18n/encodings/compact_lang_det/generated/compact_lang_det_generated_meanscore.h"
10
+ //#include "i18n/encodings/compact_lang_det/utf8propletterscriptnum.h"
11
+ //#include "third_party/utf/utf.h" // for UTFmax
12
+ //#include "util/utf8/unicodeprops.h"
13
+ //#include "util/utf8/unilib.h"
14
+ //#include "util/utf8/utf8statetable.h"
15
+ #include "encodings/compact_lang_det/win/cld_commandlineflags.h"
16
+
17
+ DEFINE_bool(dbgscore, false, "Print picture of score calculation");
18
+ DEFINE_bool(dbglookup, false, "Print every quad/uni lookup in score calc");
19
+ DEFINE_bool(dbgreli, false, "Print reliability in score calc");
20
+
21
+ namespace cld {
22
+
23
+
24
+ //------------------------------------------------------------------------------
25
+ // Debugging. Not thread safe
26
+ // This is the empty version -- routines return immediately
27
+ //------------------------------------------------------------------------------
28
+
29
+ void DbgScoreInit(const char* src, int len) {};
30
+
31
+ // Return a 3-byte + NUL code for language
32
+ void DbgLangName3(Language lang, char* temp) {};
33
+
34
+ // Show all per-language totals
35
+ void DbgScoreState() {};
36
+
37
+ void DbgScoreTop(const char* src, int srclen, Tote* chunk_tote) {};
38
+
39
+ void DbgScoreFlush() {};
40
+
41
+ // Allow additional scoring debug output
42
+ void DbgScoreRecord(const char* src, uint32 probs, int len) {};
43
+
44
+ void DbgScoreRecordUni(const char* src, int propval, int len) {};
45
+
46
+ // Debug print language name(s)
47
+ void PrintLang(FILE* f, const Tote* chunk_tote,
48
+ const Language cur_lang, const bool cur_unreliable,
49
+ Language prior_lang, bool prior_unreliable) {};
50
+
51
+ // Debug print language name(s)
52
+ void PrintLang2(FILE* f,
53
+ const Language lang1, const Language lang2, bool diff_prior) {};
54
+
55
+ // Debug print text span
56
+ void PrintText(FILE* f, Language cur_lang, const string& str) {};
57
+
58
+ // Debug print text span with speculative language
59
+ void PrintTextSpeculative(FILE* f, Language cur_lang, const string& str) {};
60
+
61
+ // Debug print ignored text span
62
+ void PrintSkippedText(FILE* f, const string& str) {};
63
+
64
+ void DbgProbsToStderr(uint32 probs) {};
65
+ void DbgUniTermToStderr(int propval, const uint8* usrc, int len) {};
66
+ // No pre/post space
67
+ void DbgBiTermToStderr(uint32 bihash, uint32 probs,
68
+ const char* src, int len) {};
69
+ void DbgQuadTermToStderr(uint32 quadhash, uint32 probs,
70
+ const char* src, int len) {};
71
+ void DbgWordTermToStderr(uint64 wordhash, uint32 probs,
72
+ const char* src, int len) {};
73
+
74
+
75
+ } // End namespace cld
76
+
@@ -0,0 +1,62 @@
1
+ // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file.
4
+
5
+ #include "encodings/compact_lang_det/compact_lang_det.h"
6
+ #include "encodings/compact_lang_det/compact_lang_det_impl.h"
7
+ #include "encodings/compact_lang_det/win/cld_basictypes.h"
8
+
9
+ // String is "code_version - data_scrape_date"
10
+ static const char* kDetectLanguageVersion = "V1.6 - 20081121";
11
+
12
+ // Large-table version for all ~160 languages (all Tiers)
13
+
14
+ Language CompactLangDet::DetectLanguage(
15
+ const DetectionTables* tables,
16
+ const char* buffer,
17
+ int buffer_length,
18
+ bool is_plain_text,
19
+ bool do_allow_extended_languages,
20
+ bool do_pick_summary_language,
21
+ bool do_remove_weak_matches,
22
+ const char* tld_hint, // "id" boosts Indonesian
23
+ int encoding_hint, // SJS boosts Japanese
24
+ Language language_hint, // ITALIAN boosts it
25
+ Language* language3,
26
+ int* percent3,
27
+ double* normalized_score3,
28
+ int* text_bytes,
29
+ bool* is_reliable) {
30
+ int flags = 0;
31
+ Language plus_one = UNKNOWN_LANGUAGE;
32
+
33
+ Language lang = CompactLangDetImpl::DetectLanguageSummaryV25(
34
+ tables,
35
+ buffer,
36
+ buffer_length,
37
+ is_plain_text,
38
+ do_pick_summary_language,
39
+ do_remove_weak_matches,
40
+ tld_hint, // "id" boosts Indonesian
41
+ encoding_hint, // SJS boosts Japanese
42
+ language_hint, // ITALIAN boosts it
43
+ do_allow_extended_languages,
44
+ flags,
45
+ plus_one,
46
+ language3,
47
+ percent3,
48
+ normalized_score3,
49
+ text_bytes,
50
+ is_reliable);
51
+ // Do not default to English
52
+ return lang;
53
+ }
54
+
55
+
56
+
57
+ // Return version text string
58
+ // String is "code_version - data_scrape_date"
59
+ const char* CompactLangDet::DetectLanguageVersion() {
60
+ return kDetectLanguageVersion;
61
+ }
62
+
@@ -0,0 +1,145 @@
1
+ // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file.
4
+
5
+ // Baybayin (ancient script of the Philippines) is detected as TAGALOG.
6
+ // Chu Nom (Vietnamese ancient Han characters) is detected as VIETNAMESE.
7
+ // HAITIAN_CREOLE is detected as such.
8
+ // NORWEGIAN and NORWEGIAN_N are detected separately (but not robustly)
9
+ // PORTUGUESE, PORTUGUESE_P, and PORTUGUESE_B are all detected as PORTUGUESE.
10
+ // ROMANIAN-Latin is detected as ROMANIAN; ROMANIAN-Cyrillic as MOLDAVIAN.
11
+ // SERBO_CROATIAN, BOSNIAN, CROATIAN, SERBIAN, MONTENEGRIN in the Latin script
12
+ // are all detected as CROATIAN; in the Cyrillic script as SERBIAN.
13
+ // Zhuang is detected in the Latin script only.
14
+ //
15
+ // The Google interface languages X_PIG_LATIN and X_KLINGON are detected in the
16
+ // extended calls ExtDetectLanguageSummary(). BorkBorkBork, ElmerFudd, and
17
+ // Hacker are not detected (too little training data).
18
+ //
19
+ // UNKNOWN_LANGUAGE is returned if no language's internal reliablity measure
20
+ // is high enough. This happens with non-text input such as the bytes of a
21
+ // JPEG, and also with some text in languages outside the Google Language
22
+ // enum, such as Ilonggo.
23
+ //
24
+ // The following languages are detected in multiple scripts:
25
+ // AZERBAIJANI (Latin, Cyrillic*, Arabic*)
26
+ // BURMESE (Latin, Myanmar)
27
+ // HAUSA (Latin, Arabic)
28
+ // KASHMIRI (Arabic, Devanagari)
29
+ // KAZAKH (Latin, Cyrillic, Arabic)
30
+ // KURDISH (Latin*, Arabic)
31
+ // KYRGYZ (Cyrillic, Arabic)
32
+ // LIMBU (Devanagari, Limbu)
33
+ // MONGOLIAN (Cyrillic, Mongolian)
34
+ // SANSKRIT (Latin, Devanagari)
35
+ // SINDHI (Arabic, Devanagari)
36
+ // TAGALOG (Latin, Tagalog)
37
+ // TAJIK (Cyrillic, Arabic*)
38
+ // TATAR (Latin, Cyrillic, Arabic)
39
+ // TURKMEN (Latin, Cyrillic, Arabic)
40
+ // UIGHUR (Latin, Cyrillic, Arabic)
41
+ // UZBEK (Latin, Cyrillic, Arabic)
42
+ //
43
+ // * Due to a shortage of training text, AZERBAIJANI is not currently detected
44
+ // in Arabic or Cyrillic scripts, nor KURDISH in Latin script, nor TAJIK in
45
+ // Arabic script.
46
+ //
47
+
48
+ #ifndef ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_H_
49
+ #define ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_H_
50
+
51
+ #include "languages/public/languages.h"
52
+ #include "encodings/compact_lang_det/win/cld_utf8statetable.h"
53
+
54
+ namespace cld {
55
+ struct CLDTableSummary;
56
+ } // namespace cld
57
+
58
+ namespace CompactLangDet {
59
+ // Scan interchange-valid UTF-8 bytes and detect most likely language,
60
+ // or set of languages.
61
+ //
62
+ // Design goals:
63
+ // Skip over big stretches of HTML tags
64
+ // Able to return ranges of different languages
65
+ // Relatively small tables and relatively fast processing
66
+ // Thread safe
67
+ //
68
+ // For HTML documents, tags are skipped, along with <script> ... </script>
69
+ // and <style> ... </style> sequences, and entities are expanded.
70
+ //
71
+ // We distinguish between bytes of the raw input buffer and bytes of non-tag
72
+ // text letters. Since tags can be over 50% of the bytes of an HTML Page,
73
+ // and are nearly all seven-bit ASCII English, we prefer to distinguish
74
+ // language mixture fractions based on just the non-tag text.
75
+ //
76
+ // Inputs: text and text_length
77
+ // Code skips HTML tags and expands HTML entities, unless
78
+ // is_plain_text is true
79
+ // Outputs:
80
+ // language3 is an array of the top 3 languages or UNKNOWN_LANGUAGE
81
+ // percent3 is an array of the text percentages 0..100 of the top 3 languages
82
+ // text_bytes is the amount of non-tag/letters-only text found
83
+ // is_reliable set true if the returned Language is some amount more
84
+ // probable then the second-best Language. Calculation is a complex function
85
+ // of the length of the text and the different-script runs of text.
86
+ // Return value: the most likely Language for the majority of the input text
87
+ // Length 0 input returns UNKNOWN_LANGUAGE.
88
+ //
89
+ // Subsetting: For fast detection over large documents, these routines will
90
+ // scan non-tag text of the initial part of a document, then will
91
+ // skip 4-16 bytes and subsample text in the rest of the document, up to a
92
+ // fixed limit (currently 160KB of non-tag letters).
93
+ //
94
+
95
+ struct DetectionTables {
96
+ const cld::CLDTableSummary* quadgram_obj;
97
+ const UTF8PropObj* unigram_obj;
98
+ };
99
+
100
+ // Scan interchange-valid UTF-8 bytes and detect list of top 3 languages.
101
+ //
102
+ // Accepts hints to bias languagepriors.
103
+ //
104
+ // Extended languages are additional Google interface languages and Unicode
105
+ // single-language scripts, from ext_lang_enc.h. They are experimental and
106
+ // this call may be removed.
107
+ //
108
+ // Returns internal language scores as a ratio to
109
+ // normal score for real text in that language. Scores close to 1.0 indicate
110
+ // normal text, while scores far away from 1.0 indicate badly-skewed text or
111
+ // gibberish
112
+ //
113
+ // If do_pick_summary_lang is true then CLD will sometimes
114
+ // not pick the top-scoring language; see CalcSummaryLang
115
+ // in compact_lang_det_impl.cc. If it's false then the
116
+ // top language is always returned.
117
+ //
118
+ // If do_remove_weak_matches is true then CLD will delete
119
+ // poor scoring languages from the results, so that if a
120
+ // language is returned there is some confidence it is
121
+ // correct.
122
+ //
123
+ Language DetectLanguage(
124
+ const DetectionTables* tables,
125
+ const char* buffer,
126
+ int buffer_length,
127
+ bool is_plain_text,
128
+ bool do_allow_extended_languages,
129
+ bool do_pick_summary_language,
130
+ bool do_remove_weak_matches,
131
+ const char* tld_hint, // "id" boosts Indonesian
132
+ int encoding_hint, // SJS boosts Japanese
133
+ Language language_hint, // ITALIAN boosts it
134
+ Language* language3,
135
+ int* percent3,
136
+ double* normalized_score3,
137
+ int* text_bytes,
138
+ bool* is_reliable);
139
+
140
+ // Return version text string
141
+ // String is "code_version - data_scrape_date"
142
+ const char* DetectLanguageVersion();
143
+ }; // End namespace CompactLangDet
144
+
145
+ #endif // ENCODINGS_COMPACT_LANG_DET_COMPACT_LANG_DET_H_