cld3 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +18 -0
- data/LICENSE +204 -0
- data/LICENSE_CLD3 +203 -0
- data/README.md +22 -0
- data/cld3.gemspec +35 -0
- data/ext/cld3/base.cc +36 -0
- data/ext/cld3/base.h +106 -0
- data/ext/cld3/casts.h +98 -0
- data/ext/cld3/embedding_feature_extractor.cc +51 -0
- data/ext/cld3/embedding_feature_extractor.h +182 -0
- data/ext/cld3/embedding_network.cc +196 -0
- data/ext/cld3/embedding_network.h +186 -0
- data/ext/cld3/embedding_network_params.h +285 -0
- data/ext/cld3/extconf.rb +49 -0
- data/ext/cld3/feature_extractor.cc +137 -0
- data/ext/cld3/feature_extractor.h +633 -0
- data/ext/cld3/feature_extractor.proto +50 -0
- data/ext/cld3/feature_types.cc +72 -0
- data/ext/cld3/feature_types.h +158 -0
- data/ext/cld3/fixunicodevalue.cc +55 -0
- data/ext/cld3/fixunicodevalue.h +69 -0
- data/ext/cld3/float16.h +58 -0
- data/ext/cld3/fml_parser.cc +308 -0
- data/ext/cld3/fml_parser.h +123 -0
- data/ext/cld3/generated_entities.cc +296 -0
- data/ext/cld3/generated_ulscript.cc +678 -0
- data/ext/cld3/generated_ulscript.h +142 -0
- data/ext/cld3/getonescriptspan.cc +1109 -0
- data/ext/cld3/getonescriptspan.h +124 -0
- data/ext/cld3/integral_types.h +37 -0
- data/ext/cld3/lang_id_nn_params.cc +57449 -0
- data/ext/cld3/lang_id_nn_params.h +178 -0
- data/ext/cld3/language_identifier_features.cc +165 -0
- data/ext/cld3/language_identifier_features.h +116 -0
- data/ext/cld3/nnet_language_identifier.cc +380 -0
- data/ext/cld3/nnet_language_identifier.h +175 -0
- data/ext/cld3/nnet_language_identifier_c.cc +72 -0
- data/ext/cld3/offsetmap.cc +478 -0
- data/ext/cld3/offsetmap.h +168 -0
- data/ext/cld3/port.h +143 -0
- data/ext/cld3/registry.cc +28 -0
- data/ext/cld3/registry.h +242 -0
- data/ext/cld3/relevant_script_feature.cc +89 -0
- data/ext/cld3/relevant_script_feature.h +49 -0
- data/ext/cld3/script_detector.h +156 -0
- data/ext/cld3/sentence.proto +77 -0
- data/ext/cld3/sentence_features.cc +29 -0
- data/ext/cld3/sentence_features.h +35 -0
- data/ext/cld3/simple_adder.h +72 -0
- data/ext/cld3/stringpiece.h +81 -0
- data/ext/cld3/task_context.cc +161 -0
- data/ext/cld3/task_context.h +81 -0
- data/ext/cld3/task_context_params.cc +74 -0
- data/ext/cld3/task_context_params.h +54 -0
- data/ext/cld3/task_spec.proto +98 -0
- data/ext/cld3/text_processing.cc +245 -0
- data/ext/cld3/text_processing.h +30 -0
- data/ext/cld3/unicodetext.cc +96 -0
- data/ext/cld3/unicodetext.h +144 -0
- data/ext/cld3/utf8acceptinterchange.h +486 -0
- data/ext/cld3/utf8prop_lettermarkscriptnum.h +1631 -0
- data/ext/cld3/utf8repl_lettermarklower.h +758 -0
- data/ext/cld3/utf8scannot_lettermarkspecial.h +1455 -0
- data/ext/cld3/utf8statetable.cc +1344 -0
- data/ext/cld3/utf8statetable.h +285 -0
- data/ext/cld3/utils.cc +241 -0
- data/ext/cld3/utils.h +144 -0
- data/ext/cld3/workspace.cc +64 -0
- data/ext/cld3/workspace.h +177 -0
- data/lib/cld3.rb +99 -0
- metadata +158 -0
@@ -0,0 +1,81 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
|
15
|
+
//
|
16
|
+
// A StringPiece points to part or all of a string, double-quoted string
|
17
|
+
// literal, or other string-like object. A StringPiece does *not* own the
|
18
|
+
// string to which it points. A StringPiece is not null-terminated. [subset]
|
19
|
+
//
|
20
|
+
|
21
|
+
#ifndef SCRIPT_SPAN_STRINGPIECE_H_
|
22
|
+
#define SCRIPT_SPAN_STRINGPIECE_H_
|
23
|
+
|
24
|
+
#include <string.h>
|
25
|
+
#include <string>
|
26
|
+
|
27
|
+
namespace chrome_lang_id {
|
28
|
+
|
29
|
+
typedef int stringpiece_ssize_type;
|
30
|
+
|
31
|
+
class StringPiece {
|
32
|
+
private:
|
33
|
+
const char* ptr_;
|
34
|
+
stringpiece_ssize_type length_;
|
35
|
+
|
36
|
+
public:
|
37
|
+
// We provide non-explicit singleton constructors so users can pass
|
38
|
+
// in a "const char*" or a "string" wherever a "StringPiece" is
|
39
|
+
// expected.
|
40
|
+
StringPiece() : ptr_(NULL), length_(0) {}
|
41
|
+
|
42
|
+
StringPiece(const char* str) // NOLINT(runtime/explicit)
|
43
|
+
: ptr_(str), length_(0) {
|
44
|
+
if (str != NULL) {
|
45
|
+
length_ = static_cast<stringpiece_ssize_type>(strlen(str));
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
StringPiece(const std::string& str) // NOLINT(runtime/explicit)
|
50
|
+
: ptr_(str.data()), length_(0) {
|
51
|
+
length_ = static_cast<stringpiece_ssize_type>(str.size());
|
52
|
+
}
|
53
|
+
|
54
|
+
StringPiece(const char* offset, stringpiece_ssize_type len)
|
55
|
+
: ptr_(offset), length_(len) {
|
56
|
+
}
|
57
|
+
|
58
|
+
void remove_prefix(stringpiece_ssize_type n) {
|
59
|
+
ptr_ += n;
|
60
|
+
length_ -= n;
|
61
|
+
}
|
62
|
+
|
63
|
+
void remove_suffix(stringpiece_ssize_type n) {
|
64
|
+
length_ -= n;
|
65
|
+
}
|
66
|
+
|
67
|
+
// data() may return a pointer to a buffer with embedded NULs, and the
|
68
|
+
// returned buffer may or may not be null terminated. Therefore it is
|
69
|
+
// typically a mistake to pass data() to a routine that expects a NUL
|
70
|
+
// terminated string.
|
71
|
+
const char* data() const { return ptr_; }
|
72
|
+
stringpiece_ssize_type size() const { return length_; }
|
73
|
+
stringpiece_ssize_type length() const { return length_; }
|
74
|
+
bool empty() const { return length_ == 0; }
|
75
|
+
};
|
76
|
+
|
77
|
+
class StringPiece;
|
78
|
+
|
79
|
+
} // namespace chrome_lang_id
|
80
|
+
|
81
|
+
#endif // SCRIPT_SPAN_STRINGPIECE_H__
|
@@ -0,0 +1,161 @@
|
|
1
|
+
/* Copyright 2016 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
==============================================================================*/
|
15
|
+
|
16
|
+
#include "task_context.h"
|
17
|
+
|
18
|
+
#include "utils.h"
|
19
|
+
|
20
|
+
namespace chrome_lang_id {
|
21
|
+
|
22
|
+
TaskContext::TaskContext() {}
|
23
|
+
|
24
|
+
TaskContext::~TaskContext() {}
|
25
|
+
|
26
|
+
TaskInput *TaskContext::GetInput(const string &name) {
|
27
|
+
// Return existing input if it exists.
|
28
|
+
for (int i = 0; i < spec_.input_size(); ++i) {
|
29
|
+
if (spec_.input(i).name() == name) return spec_.mutable_input(i);
|
30
|
+
}
|
31
|
+
|
32
|
+
// Create new input.
|
33
|
+
TaskInput *input = spec_.add_input();
|
34
|
+
input->set_name(name);
|
35
|
+
return input;
|
36
|
+
}
|
37
|
+
|
38
|
+
TaskInput *TaskContext::GetInput(const string &name, const string &file_format,
|
39
|
+
const string &record_format) {
|
40
|
+
TaskInput *input = GetInput(name);
|
41
|
+
if (!file_format.empty()) {
|
42
|
+
bool found = false;
|
43
|
+
for (int i = 0; i < input->file_format_size(); ++i) {
|
44
|
+
if (input->file_format(i) == file_format) found = true;
|
45
|
+
}
|
46
|
+
if (!found) input->add_file_format(file_format);
|
47
|
+
}
|
48
|
+
if (!record_format.empty()) {
|
49
|
+
bool found = false;
|
50
|
+
for (int i = 0; i < input->record_format_size(); ++i) {
|
51
|
+
if (input->record_format(i) == record_format) found = true;
|
52
|
+
}
|
53
|
+
if (!found) input->add_record_format(record_format);
|
54
|
+
}
|
55
|
+
return input;
|
56
|
+
}
|
57
|
+
|
58
|
+
void TaskContext::SetParameter(const string &name, const string &value) {
|
59
|
+
// If the parameter already exists update the value.
|
60
|
+
for (int i = 0; i < spec_.parameter_size(); ++i) {
|
61
|
+
if (spec_.parameter(i).name() == name) {
|
62
|
+
spec_.mutable_parameter(i)->set_value(value);
|
63
|
+
return;
|
64
|
+
}
|
65
|
+
}
|
66
|
+
|
67
|
+
// Add new parameter.
|
68
|
+
TaskSpec::Parameter *param = spec_.add_parameter();
|
69
|
+
param->set_name(name);
|
70
|
+
param->set_value(value);
|
71
|
+
}
|
72
|
+
|
73
|
+
string TaskContext::GetParameter(const string &name) const {
|
74
|
+
// First try to find parameter in task specification.
|
75
|
+
for (int i = 0; i < spec_.parameter_size(); ++i) {
|
76
|
+
if (spec_.parameter(i).name() == name) return spec_.parameter(i).value();
|
77
|
+
}
|
78
|
+
|
79
|
+
// Parameter not found, return empty string.
|
80
|
+
return "";
|
81
|
+
}
|
82
|
+
|
83
|
+
int TaskContext::GetIntParameter(const string &name) const {
|
84
|
+
string value = GetParameter(name);
|
85
|
+
return utils::ParseUsing<int>(value, 0, utils::ParseInt32);
|
86
|
+
}
|
87
|
+
|
88
|
+
bool TaskContext::GetBoolParameter(const string &name) const {
|
89
|
+
string value = GetParameter(name);
|
90
|
+
return value == "true";
|
91
|
+
}
|
92
|
+
|
93
|
+
double TaskContext::GetFloatParameter(const string &name) const {
|
94
|
+
string value = GetParameter(name);
|
95
|
+
return utils::ParseUsing<double>(value, .0, utils::ParseDouble);
|
96
|
+
}
|
97
|
+
|
98
|
+
string TaskContext::Get(const string &name, const char *defval) const {
|
99
|
+
// First try to find parameter in task specification.
|
100
|
+
for (int i = 0; i < spec_.parameter_size(); ++i) {
|
101
|
+
if (spec_.parameter(i).name() == name) return spec_.parameter(i).value();
|
102
|
+
}
|
103
|
+
|
104
|
+
// Parameter not found, return default value.
|
105
|
+
return defval;
|
106
|
+
}
|
107
|
+
|
108
|
+
string TaskContext::Get(const string &name, const string &defval) const {
|
109
|
+
return Get(name, defval.c_str());
|
110
|
+
}
|
111
|
+
|
112
|
+
int TaskContext::Get(const string &name, int defval) const {
|
113
|
+
string value = Get(name, "");
|
114
|
+
return utils::ParseUsing<int>(value, defval, utils::ParseInt32);
|
115
|
+
}
|
116
|
+
|
117
|
+
double TaskContext::Get(const string &name, double defval) const {
|
118
|
+
string value = Get(name, "");
|
119
|
+
return utils::ParseUsing<double>(value, defval, utils::ParseDouble);
|
120
|
+
}
|
121
|
+
|
122
|
+
bool TaskContext::Get(const string &name, bool defval) const {
|
123
|
+
string value = Get(name, "");
|
124
|
+
return value.empty() ? defval : value == "true";
|
125
|
+
}
|
126
|
+
|
127
|
+
string TaskContext::InputFile(const TaskInput &input) {
|
128
|
+
CLD3_CHECK(input.part_size() == 1);
|
129
|
+
return input.part(0).file_pattern();
|
130
|
+
}
|
131
|
+
|
132
|
+
bool TaskContext::Supports(const TaskInput &input, const string &file_format,
|
133
|
+
const string &record_format) {
|
134
|
+
// Check file format.
|
135
|
+
if (input.file_format_size() > 0) {
|
136
|
+
bool found = false;
|
137
|
+
for (int i = 0; i < input.file_format_size(); ++i) {
|
138
|
+
if (input.file_format(i) == file_format) {
|
139
|
+
found = true;
|
140
|
+
break;
|
141
|
+
}
|
142
|
+
}
|
143
|
+
if (!found) return false;
|
144
|
+
}
|
145
|
+
|
146
|
+
// Check record format.
|
147
|
+
if (input.record_format_size() > 0) {
|
148
|
+
bool found = false;
|
149
|
+
for (int i = 0; i < input.record_format_size(); ++i) {
|
150
|
+
if (input.record_format(i) == record_format) {
|
151
|
+
found = true;
|
152
|
+
break;
|
153
|
+
}
|
154
|
+
}
|
155
|
+
if (!found) return false;
|
156
|
+
}
|
157
|
+
|
158
|
+
return true;
|
159
|
+
}
|
160
|
+
|
161
|
+
} // namespace chrome_lang_id
|
@@ -0,0 +1,81 @@
|
|
1
|
+
/* Copyright 2016 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
==============================================================================*/
|
15
|
+
|
16
|
+
#ifndef TASK_CONTEXT_H_
|
17
|
+
#define TASK_CONTEXT_H_
|
18
|
+
|
19
|
+
#include <string>
|
20
|
+
#include <vector>
|
21
|
+
|
22
|
+
#include "base.h"
|
23
|
+
#include "cld_3/protos/task_spec.pb.h"
|
24
|
+
|
25
|
+
namespace chrome_lang_id {
|
26
|
+
|
27
|
+
// A task context holds configuration information for a task. It is basically a
|
28
|
+
// wrapper around a TaskSpec protocol buffer.
|
29
|
+
class TaskContext {
|
30
|
+
public:
|
31
|
+
TaskContext();
|
32
|
+
~TaskContext();
|
33
|
+
|
34
|
+
// Returns the underlying task specification protocol buffer for the context.
|
35
|
+
const TaskSpec &spec() const { return spec_; }
|
36
|
+
TaskSpec *mutable_spec() { return &spec_; }
|
37
|
+
|
38
|
+
// Returns a named input descriptor for the task. A new input is created if
|
39
|
+
// the task context does not already have an input with that name.
|
40
|
+
TaskInput *GetInput(const string &name);
|
41
|
+
TaskInput *GetInput(const string &name, const string &file_format,
|
42
|
+
const string &record_format);
|
43
|
+
|
44
|
+
// Sets task parameter.
|
45
|
+
void SetParameter(const string &name, const string &value);
|
46
|
+
|
47
|
+
// Returns task parameter. If the parameter is not in the task configuration
|
48
|
+
// the (default) value of the corresponding command line flag is returned.
|
49
|
+
string GetParameter(const string &name) const;
|
50
|
+
int GetIntParameter(const string &name) const;
|
51
|
+
bool GetBoolParameter(const string &name) const;
|
52
|
+
double GetFloatParameter(const string &name) const;
|
53
|
+
|
54
|
+
// Returns task parameter. If the parameter is not in the task configuration
|
55
|
+
// the default value is returned. Parameters retrieved using these methods
|
56
|
+
// don't need to be defined with a DEFINE_*() macro.
|
57
|
+
string Get(const string &name, const string &defval) const;
|
58
|
+
string Get(const string &name, const char *defval) const;
|
59
|
+
int Get(const string &name, int defval) const;
|
60
|
+
double Get(const string &name, double defval) const;
|
61
|
+
bool Get(const string &name, bool defval) const;
|
62
|
+
|
63
|
+
// Returns input file name for a single-file task input.
|
64
|
+
static string InputFile(const TaskInput &input);
|
65
|
+
|
66
|
+
// Returns true if task input supports the file and record format.
|
67
|
+
static bool Supports(const TaskInput &input, const string &file_format,
|
68
|
+
const string &record_format);
|
69
|
+
|
70
|
+
private:
|
71
|
+
// Underlying task specification protocol buffer.
|
72
|
+
TaskSpec spec_;
|
73
|
+
|
74
|
+
// Vector of parameters required by this task. These must be specified in the
|
75
|
+
// task rather than relying on default values.
|
76
|
+
std::vector<string> required_parameters_;
|
77
|
+
};
|
78
|
+
|
79
|
+
} // namespace chrome_lang_id
|
80
|
+
|
81
|
+
#endif // TASK_CONTEXT_H_
|
@@ -0,0 +1,74 @@
|
|
1
|
+
/* Copyright 2016 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
==============================================================================*/
|
15
|
+
|
16
|
+
// This file contains the hard-coded parameters from the training workflow. If
|
17
|
+
// you update the binary model, you may need to update the variables below as
|
18
|
+
// well.
|
19
|
+
|
20
|
+
#include "task_context_params.h"
|
21
|
+
|
22
|
+
#include "task_context.h"
|
23
|
+
|
24
|
+
namespace chrome_lang_id {
|
25
|
+
|
26
|
+
void TaskContextParams::ToTaskContext(TaskContext *context) {
|
27
|
+
context->SetParameter("language_identifier_features",
|
28
|
+
kLanguageIdentifierFeatures);
|
29
|
+
context->SetParameter("language_identifier_embedding_names",
|
30
|
+
kLanguageIdentifierEmbeddingNames);
|
31
|
+
context->SetParameter("language_identifier_embedding_dims",
|
32
|
+
kLanguageIdentifierEmbeddingDims);
|
33
|
+
}
|
34
|
+
|
35
|
+
int TaskContextParams::GetNumLanguages() {
|
36
|
+
int i = 0;
|
37
|
+
while (kLanguageNames[i] != nullptr) {
|
38
|
+
i++;
|
39
|
+
}
|
40
|
+
return i;
|
41
|
+
}
|
42
|
+
|
43
|
+
const char *const TaskContextParams::kLanguageNames[] = {
|
44
|
+
"eo", "co", "eu", "ta", "de", "mt", "ps", "te", "su", "uz", "zh-Latn", "ne",
|
45
|
+
"nl", "sw", "sq", "hmn", "ja", "no", "mn", "so", "ko", "kk", "sl", "ig",
|
46
|
+
"mr", "th", "zu", "ml", "hr", "bs", "lo", "sd", "cy", "hy", "uk", "pt",
|
47
|
+
"lv", "iw", "cs", "vi", "jv", "be", "km", "mk", "tr", "fy", "am", "zh",
|
48
|
+
"da", "sv", "fi", "ht", "af", "la", "id", "fil", "sm", "ca", "el", "ka",
|
49
|
+
"sr", "it", "sk", "ru", "ru-Latn", "bg", "ny", "fa", "haw", "gl", "et",
|
50
|
+
"ms", "gd", "bg-Latn", "ha", "is", "ur", "mi", "hi", "bn", "hi-Latn", "fr",
|
51
|
+
"yi", "hu", "xh", "my", "tg", "ro", "ar", "lb", "el-Latn", "st", "ceb",
|
52
|
+
"kn", "az", "si", "ky", "mg", "en", "gu", "es", "pl", "ja-Latn", "ga", "lt",
|
53
|
+
"sn", "yo", "pa", "ku",
|
54
|
+
|
55
|
+
// last element must be nullptr
|
56
|
+
nullptr,
|
57
|
+
};
|
58
|
+
|
59
|
+
const char TaskContextParams::kLanguageIdentifierFeatures[] =
|
60
|
+
"continuous-bag-of-ngrams(include_terminators=true,include_spaces=false,"
|
61
|
+
"use_equal_weight=false,id_dim=1000,size=2);continuous-bag-of-ngrams("
|
62
|
+
"include_terminators=true,include_spaces=false,use_equal_weight=false,id_"
|
63
|
+
"dim=5000,size=4);continuous-bag-of-relevant-scripts;script;continuous-bag-"
|
64
|
+
"of-ngrams(include_terminators=true,include_spaces=false,use_equal_weight="
|
65
|
+
"false,id_dim=5000,size=3);continuous-bag-of-ngrams(include_terminators="
|
66
|
+
"true,include_spaces=false,use_equal_weight=false,id_dim=100,size=1)";
|
67
|
+
|
68
|
+
const char TaskContextParams::kLanguageIdentifierEmbeddingNames[] =
|
69
|
+
"bigrams;quadgrams;relevant-scripts;text-script;trigrams;unigrams";
|
70
|
+
|
71
|
+
const char TaskContextParams::kLanguageIdentifierEmbeddingDims[] =
|
72
|
+
"16;16;8;8;16;16";
|
73
|
+
|
74
|
+
} // namespace chrome_lang_id
|
@@ -0,0 +1,54 @@
|
|
1
|
+
/* Copyright 2016 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
==============================================================================*/
|
15
|
+
|
16
|
+
#ifndef TASK_CONTEXT_PARAMS_H_
|
17
|
+
#define TASK_CONTEXT_PARAMS_H_
|
18
|
+
|
19
|
+
#include <string>
|
20
|
+
|
21
|
+
#include "base.h"
|
22
|
+
#include "task_context.h"
|
23
|
+
|
24
|
+
namespace chrome_lang_id {
|
25
|
+
|
26
|
+
// Encapsulates the TaskContext specifying only the parameters for the model.
|
27
|
+
// The model weights are loaded statically.
|
28
|
+
class TaskContextParams {
|
29
|
+
public:
|
30
|
+
// Gets the name of the i'th language.
|
31
|
+
static const char *language_names(int i) { return kLanguageNames[i]; }
|
32
|
+
|
33
|
+
// Saves the parameters to the given TaskContext.
|
34
|
+
static void ToTaskContext(TaskContext *context);
|
35
|
+
|
36
|
+
// Gets the number of languages.
|
37
|
+
static int GetNumLanguages();
|
38
|
+
|
39
|
+
private:
|
40
|
+
// Names of all the languages.
|
41
|
+
static const char *const kLanguageNames[];
|
42
|
+
|
43
|
+
// Features in FML format.
|
44
|
+
static const char kLanguageIdentifierFeatures[];
|
45
|
+
|
46
|
+
// Names of the embedding spaces.
|
47
|
+
static const char kLanguageIdentifierEmbeddingNames[];
|
48
|
+
|
49
|
+
// Dimensions of the embedding spaces.
|
50
|
+
static const char kLanguageIdentifierEmbeddingDims[];
|
51
|
+
};
|
52
|
+
} // namespace chrome_lang_id
|
53
|
+
|
54
|
+
#endif // TASK_CONTEXT_PARAMS_H_
|