cld3 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +18 -0
- data/LICENSE +204 -0
- data/LICENSE_CLD3 +203 -0
- data/README.md +22 -0
- data/cld3.gemspec +35 -0
- data/ext/cld3/base.cc +36 -0
- data/ext/cld3/base.h +106 -0
- data/ext/cld3/casts.h +98 -0
- data/ext/cld3/embedding_feature_extractor.cc +51 -0
- data/ext/cld3/embedding_feature_extractor.h +182 -0
- data/ext/cld3/embedding_network.cc +196 -0
- data/ext/cld3/embedding_network.h +186 -0
- data/ext/cld3/embedding_network_params.h +285 -0
- data/ext/cld3/extconf.rb +49 -0
- data/ext/cld3/feature_extractor.cc +137 -0
- data/ext/cld3/feature_extractor.h +633 -0
- data/ext/cld3/feature_extractor.proto +50 -0
- data/ext/cld3/feature_types.cc +72 -0
- data/ext/cld3/feature_types.h +158 -0
- data/ext/cld3/fixunicodevalue.cc +55 -0
- data/ext/cld3/fixunicodevalue.h +69 -0
- data/ext/cld3/float16.h +58 -0
- data/ext/cld3/fml_parser.cc +308 -0
- data/ext/cld3/fml_parser.h +123 -0
- data/ext/cld3/generated_entities.cc +296 -0
- data/ext/cld3/generated_ulscript.cc +678 -0
- data/ext/cld3/generated_ulscript.h +142 -0
- data/ext/cld3/getonescriptspan.cc +1109 -0
- data/ext/cld3/getonescriptspan.h +124 -0
- data/ext/cld3/integral_types.h +37 -0
- data/ext/cld3/lang_id_nn_params.cc +57449 -0
- data/ext/cld3/lang_id_nn_params.h +178 -0
- data/ext/cld3/language_identifier_features.cc +165 -0
- data/ext/cld3/language_identifier_features.h +116 -0
- data/ext/cld3/nnet_language_identifier.cc +380 -0
- data/ext/cld3/nnet_language_identifier.h +175 -0
- data/ext/cld3/nnet_language_identifier_c.cc +72 -0
- data/ext/cld3/offsetmap.cc +478 -0
- data/ext/cld3/offsetmap.h +168 -0
- data/ext/cld3/port.h +143 -0
- data/ext/cld3/registry.cc +28 -0
- data/ext/cld3/registry.h +242 -0
- data/ext/cld3/relevant_script_feature.cc +89 -0
- data/ext/cld3/relevant_script_feature.h +49 -0
- data/ext/cld3/script_detector.h +156 -0
- data/ext/cld3/sentence.proto +77 -0
- data/ext/cld3/sentence_features.cc +29 -0
- data/ext/cld3/sentence_features.h +35 -0
- data/ext/cld3/simple_adder.h +72 -0
- data/ext/cld3/stringpiece.h +81 -0
- data/ext/cld3/task_context.cc +161 -0
- data/ext/cld3/task_context.h +81 -0
- data/ext/cld3/task_context_params.cc +74 -0
- data/ext/cld3/task_context_params.h +54 -0
- data/ext/cld3/task_spec.proto +98 -0
- data/ext/cld3/text_processing.cc +245 -0
- data/ext/cld3/text_processing.h +30 -0
- data/ext/cld3/unicodetext.cc +96 -0
- data/ext/cld3/unicodetext.h +144 -0
- data/ext/cld3/utf8acceptinterchange.h +486 -0
- data/ext/cld3/utf8prop_lettermarkscriptnum.h +1631 -0
- data/ext/cld3/utf8repl_lettermarklower.h +758 -0
- data/ext/cld3/utf8scannot_lettermarkspecial.h +1455 -0
- data/ext/cld3/utf8statetable.cc +1344 -0
- data/ext/cld3/utf8statetable.h +285 -0
- data/ext/cld3/utils.cc +241 -0
- data/ext/cld3/utils.h +144 -0
- data/ext/cld3/workspace.cc +64 -0
- data/ext/cld3/workspace.h +177 -0
- data/lib/cld3.rb +99 -0
- metadata +158 -0
@@ -0,0 +1,81 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
|
15
|
+
//
|
16
|
+
// A StringPiece points to part or all of a string, double-quoted string
|
17
|
+
// literal, or other string-like object. A StringPiece does *not* own the
|
18
|
+
// string to which it points. A StringPiece is not null-terminated. [subset]
|
19
|
+
//
|
20
|
+
|
21
|
+
#ifndef SCRIPT_SPAN_STRINGPIECE_H_
|
22
|
+
#define SCRIPT_SPAN_STRINGPIECE_H_
|
23
|
+
|
24
|
+
#include <string.h>
|
25
|
+
#include <string>
|
26
|
+
|
27
|
+
namespace chrome_lang_id {
|
28
|
+
|
29
|
+
typedef int stringpiece_ssize_type;
|
30
|
+
|
31
|
+
class StringPiece {
|
32
|
+
private:
|
33
|
+
const char* ptr_;
|
34
|
+
stringpiece_ssize_type length_;
|
35
|
+
|
36
|
+
public:
|
37
|
+
// We provide non-explicit singleton constructors so users can pass
|
38
|
+
// in a "const char*" or a "string" wherever a "StringPiece" is
|
39
|
+
// expected.
|
40
|
+
StringPiece() : ptr_(NULL), length_(0) {}
|
41
|
+
|
42
|
+
StringPiece(const char* str) // NOLINT(runtime/explicit)
|
43
|
+
: ptr_(str), length_(0) {
|
44
|
+
if (str != NULL) {
|
45
|
+
length_ = static_cast<stringpiece_ssize_type>(strlen(str));
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
StringPiece(const std::string& str) // NOLINT(runtime/explicit)
|
50
|
+
: ptr_(str.data()), length_(0) {
|
51
|
+
length_ = static_cast<stringpiece_ssize_type>(str.size());
|
52
|
+
}
|
53
|
+
|
54
|
+
StringPiece(const char* offset, stringpiece_ssize_type len)
|
55
|
+
: ptr_(offset), length_(len) {
|
56
|
+
}
|
57
|
+
|
58
|
+
void remove_prefix(stringpiece_ssize_type n) {
|
59
|
+
ptr_ += n;
|
60
|
+
length_ -= n;
|
61
|
+
}
|
62
|
+
|
63
|
+
void remove_suffix(stringpiece_ssize_type n) {
|
64
|
+
length_ -= n;
|
65
|
+
}
|
66
|
+
|
67
|
+
// data() may return a pointer to a buffer with embedded NULs, and the
|
68
|
+
// returned buffer may or may not be null terminated. Therefore it is
|
69
|
+
// typically a mistake to pass data() to a routine that expects a NUL
|
70
|
+
// terminated string.
|
71
|
+
const char* data() const { return ptr_; }
|
72
|
+
stringpiece_ssize_type size() const { return length_; }
|
73
|
+
stringpiece_ssize_type length() const { return length_; }
|
74
|
+
bool empty() const { return length_ == 0; }
|
75
|
+
};
|
76
|
+
|
77
|
+
class StringPiece;
|
78
|
+
|
79
|
+
} // namespace chrome_lang_id
|
80
|
+
|
81
|
+
#endif // SCRIPT_SPAN_STRINGPIECE_H__
|
@@ -0,0 +1,161 @@
|
|
1
|
+
/* Copyright 2016 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
==============================================================================*/
|
15
|
+
|
16
|
+
#include "task_context.h"
|
17
|
+
|
18
|
+
#include "utils.h"
|
19
|
+
|
20
|
+
namespace chrome_lang_id {
|
21
|
+
|
22
|
+
TaskContext::TaskContext() {}
|
23
|
+
|
24
|
+
TaskContext::~TaskContext() {}
|
25
|
+
|
26
|
+
TaskInput *TaskContext::GetInput(const string &name) {
|
27
|
+
// Return existing input if it exists.
|
28
|
+
for (int i = 0; i < spec_.input_size(); ++i) {
|
29
|
+
if (spec_.input(i).name() == name) return spec_.mutable_input(i);
|
30
|
+
}
|
31
|
+
|
32
|
+
// Create new input.
|
33
|
+
TaskInput *input = spec_.add_input();
|
34
|
+
input->set_name(name);
|
35
|
+
return input;
|
36
|
+
}
|
37
|
+
|
38
|
+
TaskInput *TaskContext::GetInput(const string &name, const string &file_format,
|
39
|
+
const string &record_format) {
|
40
|
+
TaskInput *input = GetInput(name);
|
41
|
+
if (!file_format.empty()) {
|
42
|
+
bool found = false;
|
43
|
+
for (int i = 0; i < input->file_format_size(); ++i) {
|
44
|
+
if (input->file_format(i) == file_format) found = true;
|
45
|
+
}
|
46
|
+
if (!found) input->add_file_format(file_format);
|
47
|
+
}
|
48
|
+
if (!record_format.empty()) {
|
49
|
+
bool found = false;
|
50
|
+
for (int i = 0; i < input->record_format_size(); ++i) {
|
51
|
+
if (input->record_format(i) == record_format) found = true;
|
52
|
+
}
|
53
|
+
if (!found) input->add_record_format(record_format);
|
54
|
+
}
|
55
|
+
return input;
|
56
|
+
}
|
57
|
+
|
58
|
+
void TaskContext::SetParameter(const string &name, const string &value) {
|
59
|
+
// If the parameter already exists update the value.
|
60
|
+
for (int i = 0; i < spec_.parameter_size(); ++i) {
|
61
|
+
if (spec_.parameter(i).name() == name) {
|
62
|
+
spec_.mutable_parameter(i)->set_value(value);
|
63
|
+
return;
|
64
|
+
}
|
65
|
+
}
|
66
|
+
|
67
|
+
// Add new parameter.
|
68
|
+
TaskSpec::Parameter *param = spec_.add_parameter();
|
69
|
+
param->set_name(name);
|
70
|
+
param->set_value(value);
|
71
|
+
}
|
72
|
+
|
73
|
+
string TaskContext::GetParameter(const string &name) const {
|
74
|
+
// First try to find parameter in task specification.
|
75
|
+
for (int i = 0; i < spec_.parameter_size(); ++i) {
|
76
|
+
if (spec_.parameter(i).name() == name) return spec_.parameter(i).value();
|
77
|
+
}
|
78
|
+
|
79
|
+
// Parameter not found, return empty string.
|
80
|
+
return "";
|
81
|
+
}
|
82
|
+
|
83
|
+
int TaskContext::GetIntParameter(const string &name) const {
|
84
|
+
string value = GetParameter(name);
|
85
|
+
return utils::ParseUsing<int>(value, 0, utils::ParseInt32);
|
86
|
+
}
|
87
|
+
|
88
|
+
bool TaskContext::GetBoolParameter(const string &name) const {
|
89
|
+
string value = GetParameter(name);
|
90
|
+
return value == "true";
|
91
|
+
}
|
92
|
+
|
93
|
+
double TaskContext::GetFloatParameter(const string &name) const {
|
94
|
+
string value = GetParameter(name);
|
95
|
+
return utils::ParseUsing<double>(value, .0, utils::ParseDouble);
|
96
|
+
}
|
97
|
+
|
98
|
+
string TaskContext::Get(const string &name, const char *defval) const {
|
99
|
+
// First try to find parameter in task specification.
|
100
|
+
for (int i = 0; i < spec_.parameter_size(); ++i) {
|
101
|
+
if (spec_.parameter(i).name() == name) return spec_.parameter(i).value();
|
102
|
+
}
|
103
|
+
|
104
|
+
// Parameter not found, return default value.
|
105
|
+
return defval;
|
106
|
+
}
|
107
|
+
|
108
|
+
string TaskContext::Get(const string &name, const string &defval) const {
|
109
|
+
return Get(name, defval.c_str());
|
110
|
+
}
|
111
|
+
|
112
|
+
int TaskContext::Get(const string &name, int defval) const {
|
113
|
+
string value = Get(name, "");
|
114
|
+
return utils::ParseUsing<int>(value, defval, utils::ParseInt32);
|
115
|
+
}
|
116
|
+
|
117
|
+
double TaskContext::Get(const string &name, double defval) const {
|
118
|
+
string value = Get(name, "");
|
119
|
+
return utils::ParseUsing<double>(value, defval, utils::ParseDouble);
|
120
|
+
}
|
121
|
+
|
122
|
+
bool TaskContext::Get(const string &name, bool defval) const {
|
123
|
+
string value = Get(name, "");
|
124
|
+
return value.empty() ? defval : value == "true";
|
125
|
+
}
|
126
|
+
|
127
|
+
string TaskContext::InputFile(const TaskInput &input) {
|
128
|
+
CLD3_CHECK(input.part_size() == 1);
|
129
|
+
return input.part(0).file_pattern();
|
130
|
+
}
|
131
|
+
|
132
|
+
bool TaskContext::Supports(const TaskInput &input, const string &file_format,
|
133
|
+
const string &record_format) {
|
134
|
+
// Check file format.
|
135
|
+
if (input.file_format_size() > 0) {
|
136
|
+
bool found = false;
|
137
|
+
for (int i = 0; i < input.file_format_size(); ++i) {
|
138
|
+
if (input.file_format(i) == file_format) {
|
139
|
+
found = true;
|
140
|
+
break;
|
141
|
+
}
|
142
|
+
}
|
143
|
+
if (!found) return false;
|
144
|
+
}
|
145
|
+
|
146
|
+
// Check record format.
|
147
|
+
if (input.record_format_size() > 0) {
|
148
|
+
bool found = false;
|
149
|
+
for (int i = 0; i < input.record_format_size(); ++i) {
|
150
|
+
if (input.record_format(i) == record_format) {
|
151
|
+
found = true;
|
152
|
+
break;
|
153
|
+
}
|
154
|
+
}
|
155
|
+
if (!found) return false;
|
156
|
+
}
|
157
|
+
|
158
|
+
return true;
|
159
|
+
}
|
160
|
+
|
161
|
+
} // namespace chrome_lang_id
|
@@ -0,0 +1,81 @@
|
|
1
|
+
/* Copyright 2016 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
==============================================================================*/
|
15
|
+
|
16
|
+
#ifndef TASK_CONTEXT_H_
|
17
|
+
#define TASK_CONTEXT_H_
|
18
|
+
|
19
|
+
#include <string>
|
20
|
+
#include <vector>
|
21
|
+
|
22
|
+
#include "base.h"
|
23
|
+
#include "cld_3/protos/task_spec.pb.h"
|
24
|
+
|
25
|
+
namespace chrome_lang_id {
|
26
|
+
|
27
|
+
// A task context holds configuration information for a task. It is basically a
|
28
|
+
// wrapper around a TaskSpec protocol buffer.
|
29
|
+
class TaskContext {
|
30
|
+
public:
|
31
|
+
TaskContext();
|
32
|
+
~TaskContext();
|
33
|
+
|
34
|
+
// Returns the underlying task specification protocol buffer for the context.
|
35
|
+
const TaskSpec &spec() const { return spec_; }
|
36
|
+
TaskSpec *mutable_spec() { return &spec_; }
|
37
|
+
|
38
|
+
// Returns a named input descriptor for the task. A new input is created if
|
39
|
+
// the task context does not already have an input with that name.
|
40
|
+
TaskInput *GetInput(const string &name);
|
41
|
+
TaskInput *GetInput(const string &name, const string &file_format,
|
42
|
+
const string &record_format);
|
43
|
+
|
44
|
+
// Sets task parameter.
|
45
|
+
void SetParameter(const string &name, const string &value);
|
46
|
+
|
47
|
+
// Returns task parameter. If the parameter is not in the task configuration
|
48
|
+
// the (default) value of the corresponding command line flag is returned.
|
49
|
+
string GetParameter(const string &name) const;
|
50
|
+
int GetIntParameter(const string &name) const;
|
51
|
+
bool GetBoolParameter(const string &name) const;
|
52
|
+
double GetFloatParameter(const string &name) const;
|
53
|
+
|
54
|
+
// Returns task parameter. If the parameter is not in the task configuration
|
55
|
+
// the default value is returned. Parameters retrieved using these methods
|
56
|
+
// don't need to be defined with a DEFINE_*() macro.
|
57
|
+
string Get(const string &name, const string &defval) const;
|
58
|
+
string Get(const string &name, const char *defval) const;
|
59
|
+
int Get(const string &name, int defval) const;
|
60
|
+
double Get(const string &name, double defval) const;
|
61
|
+
bool Get(const string &name, bool defval) const;
|
62
|
+
|
63
|
+
// Returns input file name for a single-file task input.
|
64
|
+
static string InputFile(const TaskInput &input);
|
65
|
+
|
66
|
+
// Returns true if task input supports the file and record format.
|
67
|
+
static bool Supports(const TaskInput &input, const string &file_format,
|
68
|
+
const string &record_format);
|
69
|
+
|
70
|
+
private:
|
71
|
+
// Underlying task specification protocol buffer.
|
72
|
+
TaskSpec spec_;
|
73
|
+
|
74
|
+
// Vector of parameters required by this task. These must be specified in the
|
75
|
+
// task rather than relying on default values.
|
76
|
+
std::vector<string> required_parameters_;
|
77
|
+
};
|
78
|
+
|
79
|
+
} // namespace chrome_lang_id
|
80
|
+
|
81
|
+
#endif // TASK_CONTEXT_H_
|
@@ -0,0 +1,74 @@
|
|
1
|
+
/* Copyright 2016 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
==============================================================================*/
|
15
|
+
|
16
|
+
// This file contains the hard-coded parameters from the training workflow. If
|
17
|
+
// you update the binary model, you may need to update the variables below as
|
18
|
+
// well.
|
19
|
+
|
20
|
+
#include "task_context_params.h"
|
21
|
+
|
22
|
+
#include "task_context.h"
|
23
|
+
|
24
|
+
namespace chrome_lang_id {
|
25
|
+
|
26
|
+
void TaskContextParams::ToTaskContext(TaskContext *context) {
|
27
|
+
context->SetParameter("language_identifier_features",
|
28
|
+
kLanguageIdentifierFeatures);
|
29
|
+
context->SetParameter("language_identifier_embedding_names",
|
30
|
+
kLanguageIdentifierEmbeddingNames);
|
31
|
+
context->SetParameter("language_identifier_embedding_dims",
|
32
|
+
kLanguageIdentifierEmbeddingDims);
|
33
|
+
}
|
34
|
+
|
35
|
+
int TaskContextParams::GetNumLanguages() {
|
36
|
+
int i = 0;
|
37
|
+
while (kLanguageNames[i] != nullptr) {
|
38
|
+
i++;
|
39
|
+
}
|
40
|
+
return i;
|
41
|
+
}
|
42
|
+
|
43
|
+
const char *const TaskContextParams::kLanguageNames[] = {
|
44
|
+
"eo", "co", "eu", "ta", "de", "mt", "ps", "te", "su", "uz", "zh-Latn", "ne",
|
45
|
+
"nl", "sw", "sq", "hmn", "ja", "no", "mn", "so", "ko", "kk", "sl", "ig",
|
46
|
+
"mr", "th", "zu", "ml", "hr", "bs", "lo", "sd", "cy", "hy", "uk", "pt",
|
47
|
+
"lv", "iw", "cs", "vi", "jv", "be", "km", "mk", "tr", "fy", "am", "zh",
|
48
|
+
"da", "sv", "fi", "ht", "af", "la", "id", "fil", "sm", "ca", "el", "ka",
|
49
|
+
"sr", "it", "sk", "ru", "ru-Latn", "bg", "ny", "fa", "haw", "gl", "et",
|
50
|
+
"ms", "gd", "bg-Latn", "ha", "is", "ur", "mi", "hi", "bn", "hi-Latn", "fr",
|
51
|
+
"yi", "hu", "xh", "my", "tg", "ro", "ar", "lb", "el-Latn", "st", "ceb",
|
52
|
+
"kn", "az", "si", "ky", "mg", "en", "gu", "es", "pl", "ja-Latn", "ga", "lt",
|
53
|
+
"sn", "yo", "pa", "ku",
|
54
|
+
|
55
|
+
// last element must be nullptr
|
56
|
+
nullptr,
|
57
|
+
};
|
58
|
+
|
59
|
+
const char TaskContextParams::kLanguageIdentifierFeatures[] =
|
60
|
+
"continuous-bag-of-ngrams(include_terminators=true,include_spaces=false,"
|
61
|
+
"use_equal_weight=false,id_dim=1000,size=2);continuous-bag-of-ngrams("
|
62
|
+
"include_terminators=true,include_spaces=false,use_equal_weight=false,id_"
|
63
|
+
"dim=5000,size=4);continuous-bag-of-relevant-scripts;script;continuous-bag-"
|
64
|
+
"of-ngrams(include_terminators=true,include_spaces=false,use_equal_weight="
|
65
|
+
"false,id_dim=5000,size=3);continuous-bag-of-ngrams(include_terminators="
|
66
|
+
"true,include_spaces=false,use_equal_weight=false,id_dim=100,size=1)";
|
67
|
+
|
68
|
+
const char TaskContextParams::kLanguageIdentifierEmbeddingNames[] =
|
69
|
+
"bigrams;quadgrams;relevant-scripts;text-script;trigrams;unigrams";
|
70
|
+
|
71
|
+
const char TaskContextParams::kLanguageIdentifierEmbeddingDims[] =
|
72
|
+
"16;16;8;8;16;16";
|
73
|
+
|
74
|
+
} // namespace chrome_lang_id
|
@@ -0,0 +1,54 @@
|
|
1
|
+
/* Copyright 2016 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
==============================================================================*/
|
15
|
+
|
16
|
+
#ifndef TASK_CONTEXT_PARAMS_H_
|
17
|
+
#define TASK_CONTEXT_PARAMS_H_
|
18
|
+
|
19
|
+
#include <string>
|
20
|
+
|
21
|
+
#include "base.h"
|
22
|
+
#include "task_context.h"
|
23
|
+
|
24
|
+
namespace chrome_lang_id {
|
25
|
+
|
26
|
+
// Encapsulates the TaskContext specifying only the parameters for the model.
|
27
|
+
// The model weights are loaded statically.
|
28
|
+
class TaskContextParams {
|
29
|
+
public:
|
30
|
+
// Gets the name of the i'th language.
|
31
|
+
static const char *language_names(int i) { return kLanguageNames[i]; }
|
32
|
+
|
33
|
+
// Saves the parameters to the given TaskContext.
|
34
|
+
static void ToTaskContext(TaskContext *context);
|
35
|
+
|
36
|
+
// Gets the number of languages.
|
37
|
+
static int GetNumLanguages();
|
38
|
+
|
39
|
+
private:
|
40
|
+
// Names of all the languages.
|
41
|
+
static const char *const kLanguageNames[];
|
42
|
+
|
43
|
+
// Features in FML format.
|
44
|
+
static const char kLanguageIdentifierFeatures[];
|
45
|
+
|
46
|
+
// Names of the embedding spaces.
|
47
|
+
static const char kLanguageIdentifierEmbeddingNames[];
|
48
|
+
|
49
|
+
// Dimensions of the embedding spaces.
|
50
|
+
static const char kLanguageIdentifierEmbeddingDims[];
|
51
|
+
};
|
52
|
+
} // namespace chrome_lang_id
|
53
|
+
|
54
|
+
#endif // TASK_CONTEXT_PARAMS_H_
|