cld3 3.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +18 -0
  3. data/LICENSE +204 -0
  4. data/LICENSE_CLD3 +203 -0
  5. data/README.md +22 -0
  6. data/cld3.gemspec +35 -0
  7. data/ext/cld3/base.cc +36 -0
  8. data/ext/cld3/base.h +106 -0
  9. data/ext/cld3/casts.h +98 -0
  10. data/ext/cld3/embedding_feature_extractor.cc +51 -0
  11. data/ext/cld3/embedding_feature_extractor.h +182 -0
  12. data/ext/cld3/embedding_network.cc +196 -0
  13. data/ext/cld3/embedding_network.h +186 -0
  14. data/ext/cld3/embedding_network_params.h +285 -0
  15. data/ext/cld3/extconf.rb +49 -0
  16. data/ext/cld3/feature_extractor.cc +137 -0
  17. data/ext/cld3/feature_extractor.h +633 -0
  18. data/ext/cld3/feature_extractor.proto +50 -0
  19. data/ext/cld3/feature_types.cc +72 -0
  20. data/ext/cld3/feature_types.h +158 -0
  21. data/ext/cld3/fixunicodevalue.cc +55 -0
  22. data/ext/cld3/fixunicodevalue.h +69 -0
  23. data/ext/cld3/float16.h +58 -0
  24. data/ext/cld3/fml_parser.cc +308 -0
  25. data/ext/cld3/fml_parser.h +123 -0
  26. data/ext/cld3/generated_entities.cc +296 -0
  27. data/ext/cld3/generated_ulscript.cc +678 -0
  28. data/ext/cld3/generated_ulscript.h +142 -0
  29. data/ext/cld3/getonescriptspan.cc +1109 -0
  30. data/ext/cld3/getonescriptspan.h +124 -0
  31. data/ext/cld3/integral_types.h +37 -0
  32. data/ext/cld3/lang_id_nn_params.cc +57449 -0
  33. data/ext/cld3/lang_id_nn_params.h +178 -0
  34. data/ext/cld3/language_identifier_features.cc +165 -0
  35. data/ext/cld3/language_identifier_features.h +116 -0
  36. data/ext/cld3/nnet_language_identifier.cc +380 -0
  37. data/ext/cld3/nnet_language_identifier.h +175 -0
  38. data/ext/cld3/nnet_language_identifier_c.cc +72 -0
  39. data/ext/cld3/offsetmap.cc +478 -0
  40. data/ext/cld3/offsetmap.h +168 -0
  41. data/ext/cld3/port.h +143 -0
  42. data/ext/cld3/registry.cc +28 -0
  43. data/ext/cld3/registry.h +242 -0
  44. data/ext/cld3/relevant_script_feature.cc +89 -0
  45. data/ext/cld3/relevant_script_feature.h +49 -0
  46. data/ext/cld3/script_detector.h +156 -0
  47. data/ext/cld3/sentence.proto +77 -0
  48. data/ext/cld3/sentence_features.cc +29 -0
  49. data/ext/cld3/sentence_features.h +35 -0
  50. data/ext/cld3/simple_adder.h +72 -0
  51. data/ext/cld3/stringpiece.h +81 -0
  52. data/ext/cld3/task_context.cc +161 -0
  53. data/ext/cld3/task_context.h +81 -0
  54. data/ext/cld3/task_context_params.cc +74 -0
  55. data/ext/cld3/task_context_params.h +54 -0
  56. data/ext/cld3/task_spec.proto +98 -0
  57. data/ext/cld3/text_processing.cc +245 -0
  58. data/ext/cld3/text_processing.h +30 -0
  59. data/ext/cld3/unicodetext.cc +96 -0
  60. data/ext/cld3/unicodetext.h +144 -0
  61. data/ext/cld3/utf8acceptinterchange.h +486 -0
  62. data/ext/cld3/utf8prop_lettermarkscriptnum.h +1631 -0
  63. data/ext/cld3/utf8repl_lettermarklower.h +758 -0
  64. data/ext/cld3/utf8scannot_lettermarkspecial.h +1455 -0
  65. data/ext/cld3/utf8statetable.cc +1344 -0
  66. data/ext/cld3/utf8statetable.h +285 -0
  67. data/ext/cld3/utils.cc +241 -0
  68. data/ext/cld3/utils.h +144 -0
  69. data/ext/cld3/workspace.cc +64 -0
  70. data/ext/cld3/workspace.h +177 -0
  71. data/lib/cld3.rb +99 -0
  72. metadata +158 -0
@@ -0,0 +1,178 @@
1
+ /* Copyright 2016 Google Inc. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+
16
+ #ifndef LANG_ID_NN_PARAMS_H_
17
+ #define LANG_ID_NN_PARAMS_H_
18
+
19
+ #include "base.h"
20
+ #include "embedding_network_params.h"
21
+ #include "float16.h"
22
+
23
+ namespace chrome_lang_id {
24
+
25
+ class LangIdNNParams : public EmbeddingNetworkParams {
26
+ public:
27
+ ~LangIdNNParams() override {}
28
+
29
+ // Access methods for embeddings:
30
+ int embeddings_size() const override { return 6; }
31
+ int embeddings_num_rows(int i) const override {
32
+ return kEmbeddingsNumRows[i];
33
+ }
34
+ int embeddings_num_cols(int i) const override {
35
+ return kEmbeddingsNumCols[i];
36
+ }
37
+ const void *embeddings_weights(int i) const override {
38
+ return embeddings_weights_[i];
39
+ }
40
+ QuantizationType embeddings_quant_type(int i) const override {
41
+ return QuantizationType::UINT8;
42
+ }
43
+ const float16 *embeddings_quant_scales(int i) const override {
44
+ return embeddings_quant_scales_[i];
45
+ }
46
+
47
+ // Access methods for hidden:
48
+ int hidden_size() const override { return 1; }
49
+ int hidden_num_rows(int i) const override { return kHiddenNumRows[i]; }
50
+ int hidden_num_cols(int i) const override { return kHiddenNumCols[i]; }
51
+ const void *hidden_weights(int i) const override {
52
+ return hidden_weights_[i];
53
+ }
54
+
55
+ // Access methods for hidden_bias:
56
+ int hidden_bias_size() const override { return 1; }
57
+ int hidden_bias_num_rows(int i) const override {
58
+ return kHiddenBiasNumRows[i];
59
+ }
60
+ int hidden_bias_num_cols(int i) const override {
61
+ return kHiddenBiasNumCols[i];
62
+ }
63
+ const void *hidden_bias_weights(int i) const override {
64
+ return hidden_bias_weights_[i];
65
+ }
66
+
67
+ // Access methods for softmax:
68
+ int softmax_size() const override { return 1; }
69
+ int softmax_num_rows(int i) const override { return kSoftmaxNumRows[i]; }
70
+ int softmax_num_cols(int i) const override { return kSoftmaxNumCols[i]; }
71
+ const void *softmax_weights(int i) const override {
72
+ return softmax_weights_[i];
73
+ }
74
+
75
+ // Access methods for softmax_bias:
76
+ int softmax_bias_size() const override { return 1; }
77
+ int softmax_bias_num_rows(int i) const override {
78
+ return kSoftmaxBiasNumRows[i];
79
+ }
80
+ int softmax_bias_num_cols(int i) const override {
81
+ return kSoftmaxBiasNumCols[i];
82
+ }
83
+ const void *softmax_bias_weights(int i) const override {
84
+ return softmax_bias_weights_[i];
85
+ }
86
+
87
+ // Access methods for embedding_dim:
88
+ int embedding_dim_size() const override { return 6; }
89
+ int32 embedding_dim(int i) const override { return kEmbeddingDimValues[i]; }
90
+
91
+ // Access methods for embedding_num_features:
92
+ int embedding_num_features_size() const override { return 6; }
93
+ int32 embedding_num_features(int i) const override {
94
+ return kEmbeddingNumFeaturesValues[i];
95
+ }
96
+
97
+ // Access methods for embedding_features_domain_size:
98
+ int embedding_features_domain_size_size() const override { return 6; }
99
+ int32 embedding_features_domain_size(int i) const override {
100
+ return kEmbeddingFeaturesDomainSizeValues[i];
101
+ }
102
+
103
+ // Access methods for concat_offset:
104
+ int concat_offset_size() const override { return 6; }
105
+ int32 concat_offset(int i) const override { return kConcatOffsetValues[i]; }
106
+
107
+ // Access methods for concat_layer_size:
108
+ bool has_concat_layer_size() const override { return true; }
109
+ int32 concat_layer_size() const override { return 80; }
110
+
111
+ // Access methods for is_precomputed:
112
+ bool has_is_precomputed() const override { return false; }
113
+ bool is_precomputed() const override { return false; }
114
+
115
+ private:
116
+ // Private fields for embeddings:
117
+ static const int kEmbeddingsNumRows[];
118
+ static const int kEmbeddingsNumCols[];
119
+ static const uint8 kEmbeddingsWeights0[];
120
+ static const uint8 kEmbeddingsWeights1[];
121
+ static const uint8 kEmbeddingsWeights2[];
122
+ static const uint8 kEmbeddingsWeights3[];
123
+ static const uint8 kEmbeddingsWeights4[];
124
+ static const uint8 kEmbeddingsWeights5[];
125
+ const void *embeddings_weights_[6] = {
126
+ kEmbeddingsWeights0, kEmbeddingsWeights1, kEmbeddingsWeights2,
127
+ kEmbeddingsWeights3, kEmbeddingsWeights4, kEmbeddingsWeights5};
128
+ static const float16 kEmbeddingsQuantScales0[];
129
+ static const float16 kEmbeddingsQuantScales1[];
130
+ static const float16 kEmbeddingsQuantScales2[];
131
+ static const float16 kEmbeddingsQuantScales3[];
132
+ static const float16 kEmbeddingsQuantScales4[];
133
+ static const float16 kEmbeddingsQuantScales5[];
134
+ const float16 *embeddings_quant_scales_[6] = {
135
+ kEmbeddingsQuantScales0, kEmbeddingsQuantScales1,
136
+ kEmbeddingsQuantScales2, kEmbeddingsQuantScales3,
137
+ kEmbeddingsQuantScales4, kEmbeddingsQuantScales5};
138
+
139
+ // Private fields for hidden:
140
+ static const int kHiddenNumRows[];
141
+ static const int kHiddenNumCols[];
142
+ static const float kHiddenWeights0[];
143
+ const void *hidden_weights_[1] = {kHiddenWeights0};
144
+
145
+ // Private fields for hidden_bias:
146
+ static const int kHiddenBiasNumRows[];
147
+ static const int kHiddenBiasNumCols[];
148
+ static const float kHiddenBiasWeights0[];
149
+ const void *hidden_bias_weights_[1] = {kHiddenBiasWeights0};
150
+
151
+ // Private fields for softmax:
152
+ static const int kSoftmaxNumRows[];
153
+ static const int kSoftmaxNumCols[];
154
+ static const float kSoftmaxWeights0[];
155
+ const void *softmax_weights_[1] = {kSoftmaxWeights0};
156
+
157
+ // Private fields for softmax_bias:
158
+ static const int kSoftmaxBiasNumRows[];
159
+ static const int kSoftmaxBiasNumCols[];
160
+ static const float kSoftmaxBiasWeights0[];
161
+ const void *softmax_bias_weights_[1] = {kSoftmaxBiasWeights0};
162
+
163
+ // Private fields for embedding_dim:
164
+ static const int32 kEmbeddingDimValues[];
165
+
166
+ // Private fields for embedding_num_features:
167
+ static const int32 kEmbeddingNumFeaturesValues[];
168
+
169
+ // Private fields for embedding_features_domain_size:
170
+ static const int32 kEmbeddingFeaturesDomainSizeValues[];
171
+
172
+ // Private fields for concat_offset:
173
+ static const int32 kConcatOffsetValues[];
174
+ }; // class LangIdNNParams
175
+
176
+ } // namespace chrome_lang_id
177
+
178
+ #endif // LANG_ID_NN_PARAMS_H_
@@ -0,0 +1,165 @@
1
+ /* Copyright 2016 Google Inc. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+
16
+ #include "language_identifier_features.h"
17
+
18
+ #include <sstream>
19
+ #include <unordered_map>
20
+ #include <utility>
21
+ #include <vector>
22
+
23
+ #include "base.h"
24
+ #include "feature_extractor.h"
25
+ #include "feature_types.h"
26
+ #include "script_span/generated_ulscript.h"
27
+ #include "script_span/getonescriptspan.h"
28
+ #include "sentence_features.h"
29
+ #include "task_context.h"
30
+ #include "unicodetext.h"
31
+ #include "utils.h"
32
+
33
+ namespace chrome_lang_id {
34
+ NumericFeatureType::NumericFeatureType(const string &name, FeatureValue size)
35
+ : FeatureType(name), size_(size) {}
36
+
37
+ string NumericFeatureType::GetFeatureValueName(FeatureValue value) const {
38
+ return value < 0 ? "" : Int64ToString(value);
39
+ }
40
+
41
+ FeatureValue NumericFeatureType::GetDomainSize() const { return size_; }
42
+
43
+ void ContinuousBagOfNgramsFunction::Setup(TaskContext *context) {
44
+ // Parameters in the feature function descriptor.
45
+ include_terminators_ = GetBoolParameter("include_terminators", false);
46
+ include_spaces_ = GetBoolParameter("include_spaces", false);
47
+ use_equal_ngram_weight_ = GetBoolParameter("use_equal_weight", false);
48
+ ngram_id_dimension_ = GetIntParameter("id_dim", 10000);
49
+ ngram_size_ = GetIntParameter("size", 3);
50
+ }
51
+
52
+ void ContinuousBagOfNgramsFunction::Init(TaskContext *context) {
53
+ set_feature_type(new NumericFeatureType(name(), ngram_id_dimension_));
54
+ }
55
+
56
+ void ContinuousBagOfNgramsFunction::Evaluate(const WorkspaceSet &workspaces,
57
+ const Sentence &sentence,
58
+ FeatureVector *result) const {
59
+ // Include terminators for each token. Tokens are discovered by splitting the
60
+ // text on spaces.
61
+ std::vector<string> chars;
62
+ utils::GetUTF8Chars(sentence.text(), &chars);
63
+ if (include_terminators_) {
64
+ std::vector<string> new_chars{"^"};
65
+ for (size_t index = 0; index < chars.size(); ++index) {
66
+ if (chars.at(index) == " ") {
67
+ new_chars.push_back("$");
68
+ new_chars.push_back(" ");
69
+ new_chars.push_back("^");
70
+ } else {
71
+ new_chars.push_back(chars.at(index));
72
+ }
73
+ }
74
+ new_chars.push_back("$");
75
+ chars.swap(new_chars);
76
+ }
77
+
78
+ // Find the char ngram counts.
79
+ std::unordered_map<string, int> char_ngram_counts;
80
+ int count_sum = 0;
81
+ for (int start = 0; start <= static_cast<int>(chars.size()) - ngram_size_;
82
+ ++start) {
83
+ string char_ngram;
84
+ int index;
85
+ for (index = 0; index < ngram_size_; ++index) {
86
+ const string &current_char = chars.at(start + index);
87
+ if (current_char == " " && !include_spaces_) {
88
+ break;
89
+ }
90
+ char_ngram.append(current_char);
91
+ }
92
+ if (index == ngram_size_) {
93
+ char_ngram_counts[char_ngram]++;
94
+ ++count_sum;
95
+ }
96
+ }
97
+
98
+ // Populate the feature vector.
99
+ const float equal_weight = 1.0 / char_ngram_counts.size();
100
+ const float norm = static_cast<float>(count_sum);
101
+ for (const auto &ngram_and_count : char_ngram_counts) {
102
+ const float weight =
103
+ use_equal_ngram_weight_ ? equal_weight : ngram_and_count.second / norm;
104
+ FloatFeatureValue value(
105
+ utils::Hash32WithDefaultSeed(ngram_and_count.first) %
106
+ ngram_id_dimension_,
107
+ weight);
108
+ result->add(feature_type(), value.discrete_value);
109
+ }
110
+ }
111
+
112
+ FeatureValue ScriptFeature::Compute(const WorkspaceSet &workspaces,
113
+ const Sentence &sentence,
114
+ const FeatureVector *result) const {
115
+ const string &text = sentence.text();
116
+ CLD2::ScriptScanner ss(text.c_str(), text.size(),
117
+ /*is_plain_text=*/true);
118
+
119
+ // GetOneScriptSpan() is called only once because of the assumption that the
120
+ // input contains one script. This function also cleans up the input (e.g.,
121
+ // removes digits, punctuation).
122
+ // TODO(abakalov): Extract the clean-up and script detection code out of
123
+ // GetOneScriptSpan() because we don't have to iterate over the whole text,
124
+ // just look at the first codepoint after clean-up.
125
+ CLD2::LangSpan script_span;
126
+ ss.GetOneScriptSpan(&script_span);
127
+ const CLD2::ULScript ulscript = script_span.ulscript;
128
+ if (ulscript != CLD2::ULScript_Hani) {
129
+ return ulscript;
130
+ } else {
131
+ // Out of the codepoints captured by ULScript_Hani, separately count those
132
+ // in Hangul (Korean script) and those in a script other than Hangul.
133
+ int num_hangul = 0;
134
+ int num_non_hangul = 0;
135
+ UnicodeText unicode_text;
136
+ unicode_text.PointToUTF8(script_span.text, script_span.text_bytes);
137
+ for (chrome_lang_id::char32 codepoint : unicode_text) {
138
+ // If the current codepoint is space, continue.
139
+ if (codepoint == 0x20) {
140
+ continue;
141
+ }
142
+
143
+ // Check if the current codepoint is within the ranges associated with
144
+ // Hangul.
145
+ if ((codepoint >= 0x1100 && codepoint <= 0x11FF) || // Hangul Jamo
146
+ (codepoint >= 0xA960 && codepoint <= 0xA97F) || // Jamo Extended A
147
+ (codepoint >= 0xD7B0 && codepoint <= 0xD7FF) || // Jamo Extended B
148
+ (codepoint >= 0x3130 && codepoint <= 0x318F) || // Compatibility Jamo
149
+ (codepoint >= 0xFFA0 && codepoint <= 0xFFDC) || // Halfwidth Jamo
150
+ (codepoint >= 0xAC00 && codepoint <= 0xD7AF)) { // Hangul Syllables
151
+ num_hangul++;
152
+ } else {
153
+ num_non_hangul++;
154
+ }
155
+ }
156
+
157
+ if (num_hangul > num_non_hangul) {
158
+ return static_cast<FeatureValue>(CLD2::NUM_ULSCRIPTS);
159
+ } else {
160
+ return static_cast<FeatureValue>(CLD2::ULScript_Hani);
161
+ }
162
+ }
163
+ }
164
+
165
+ } // namespace chrome_lang_id
@@ -0,0 +1,116 @@
1
+ /* Copyright 2016 Google Inc. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+
16
+ #ifndef LANGUAGE_IDENTIFIER_FEATURES_H_
17
+ #define LANGUAGE_IDENTIFIER_FEATURES_H_
18
+
19
+ #include <string>
20
+
21
+ #include "feature_extractor.h"
22
+ #include "feature_types.h"
23
+ #include "script_span/generated_ulscript.h"
24
+ #include "cld_3/protos/sentence.pb.h"
25
+ #include "sentence_features.h"
26
+ #include "task_context.h"
27
+ #include "workspace.h"
28
+
29
+ namespace chrome_lang_id {
30
+
31
+ // Feature type for numeric features.
32
+ class NumericFeatureType : public FeatureType {
33
+ public:
34
+ // Initializes numeric feature.
35
+ NumericFeatureType(const string &name, FeatureValue size);
36
+
37
+ // Returns numeric feature value.
38
+ string GetFeatureValueName(FeatureValue value) const override;
39
+
40
+ // Returns the number of feature values.
41
+ FeatureValue GetDomainSize() const override;
42
+
43
+ private:
44
+ FeatureValue size_;
45
+ };
46
+
47
+ // Class for computing continuous char ngram features.
48
+ // Feature function descriptor parameters:
49
+ // include_terminators(bool, false):
50
+ // If 'true', then splits the text based on spaces to get tokens, adds "^"
51
+ // to the beginning of each token, and adds "$" to the end of each token.
52
+ // include_spaces(bool, false):
53
+ // If 'true', then includes char ngrams containing spaces.
54
+ // use_equal_weight(bool, false):
55
+ // If 'true', then weighs each unique ngram by 1.0 / (number of unique
56
+ // ngrams in the input). Otherwise, weighs each unique ngram by (ngram
57
+ // count) / (total number of ngrams).
58
+ // id_dim(int, 10000):
59
+ // The integer id of each char ngram is computed as follows:
60
+ // Hash32WithDefaultSeed(char ngram) % id_dim.
61
+ // size(int, 3):
62
+ // Only ngrams of this size will be extracted.
63
+ class ContinuousBagOfNgramsFunction : public WholeSentenceFeature {
64
+ public:
65
+ void Setup(TaskContext *context) override;
66
+ void Init(TaskContext *context) override;
67
+
68
+ // Appends the features computed from the focus to the feature vector.
69
+ void Evaluate(const WorkspaceSet &workspaces, const Sentence &sentence,
70
+ FeatureVector *result) const override;
71
+
72
+ private:
73
+ // If 'true', then splits the text based on spaces to get tokens, adds "^" to
74
+ // the beginning of each token, and adds "$" to the end of each token.
75
+ bool include_terminators_;
76
+
77
+ // If 'true', then includes char ngrams containing spaces.
78
+ bool include_spaces_;
79
+
80
+ // If 'true', then weighs each unique ngram by 1.0 / (number of unique ngrams
81
+ // in the input). Otherwise, weighs each unique ngram by (ngram count) /
82
+ // (total number of ngrams).
83
+ bool use_equal_ngram_weight_;
84
+
85
+ // The integer id of each char ngram is computed as follows:
86
+ // Hash32WithDefaultSeed(char_ngram) % ngram_id_dimension_.
87
+ int ngram_id_dimension_;
88
+
89
+ // Only ngrams of size ngram_size_ will be extracted.
90
+ int ngram_size_;
91
+ };
92
+
93
+ // Class for detecting the script of a piece of text. The list of supported
94
+ // scripts is chrome_lang_id::CLD2::ULScript. This class uses the script
95
+ // recognition code ported from CLD2. ULScript_Hani is split into non-Korean
96
+ // script and Korean script (Hangul). In the former case, the function emits
97
+ // ULScript_Hani. In the latter case, the function emits NUM_ULSCRIPTS. The
98
+ // class assumes that the input is (1) interchange valid UTF8, and (2) contains
99
+ // only one chrome_lang_id::CLD2::ULScript.
100
+ class ScriptFeature : public WholeSentenceFeature {
101
+ public:
102
+ void Init(TaskContext *context) override {
103
+ // The dimension is incremented by 1 because ULScript_Hani is split into two
104
+ // as mentioned in the class description.
105
+ set_feature_type(new NumericFeatureType(
106
+ name(), chrome_lang_id::CLD2::NUM_ULSCRIPTS + 1));
107
+ }
108
+
109
+ // Computes the feature and saves it in the feature vector.
110
+ FeatureValue Compute(const WorkspaceSet &workspaces, const Sentence &sentence,
111
+ const FeatureVector *result) const override;
112
+ };
113
+
114
+ } // namespace chrome_lang_id
115
+
116
+ #endif // LANGUAGE_IDENTIFIER_FEATURES_H_