cld3 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +18 -0
  3. data/LICENSE +204 -0
  4. data/LICENSE_CLD3 +203 -0
  5. data/README.md +22 -0
  6. data/cld3.gemspec +35 -0
  7. data/ext/cld3/base.cc +36 -0
  8. data/ext/cld3/base.h +106 -0
  9. data/ext/cld3/casts.h +98 -0
  10. data/ext/cld3/embedding_feature_extractor.cc +51 -0
  11. data/ext/cld3/embedding_feature_extractor.h +182 -0
  12. data/ext/cld3/embedding_network.cc +196 -0
  13. data/ext/cld3/embedding_network.h +186 -0
  14. data/ext/cld3/embedding_network_params.h +285 -0
  15. data/ext/cld3/extconf.rb +49 -0
  16. data/ext/cld3/feature_extractor.cc +137 -0
  17. data/ext/cld3/feature_extractor.h +633 -0
  18. data/ext/cld3/feature_extractor.proto +50 -0
  19. data/ext/cld3/feature_types.cc +72 -0
  20. data/ext/cld3/feature_types.h +158 -0
  21. data/ext/cld3/fixunicodevalue.cc +55 -0
  22. data/ext/cld3/fixunicodevalue.h +69 -0
  23. data/ext/cld3/float16.h +58 -0
  24. data/ext/cld3/fml_parser.cc +308 -0
  25. data/ext/cld3/fml_parser.h +123 -0
  26. data/ext/cld3/generated_entities.cc +296 -0
  27. data/ext/cld3/generated_ulscript.cc +678 -0
  28. data/ext/cld3/generated_ulscript.h +142 -0
  29. data/ext/cld3/getonescriptspan.cc +1109 -0
  30. data/ext/cld3/getonescriptspan.h +124 -0
  31. data/ext/cld3/integral_types.h +37 -0
  32. data/ext/cld3/lang_id_nn_params.cc +57449 -0
  33. data/ext/cld3/lang_id_nn_params.h +178 -0
  34. data/ext/cld3/language_identifier_features.cc +165 -0
  35. data/ext/cld3/language_identifier_features.h +116 -0
  36. data/ext/cld3/nnet_language_identifier.cc +380 -0
  37. data/ext/cld3/nnet_language_identifier.h +175 -0
  38. data/ext/cld3/nnet_language_identifier_c.cc +72 -0
  39. data/ext/cld3/offsetmap.cc +478 -0
  40. data/ext/cld3/offsetmap.h +168 -0
  41. data/ext/cld3/port.h +143 -0
  42. data/ext/cld3/registry.cc +28 -0
  43. data/ext/cld3/registry.h +242 -0
  44. data/ext/cld3/relevant_script_feature.cc +89 -0
  45. data/ext/cld3/relevant_script_feature.h +49 -0
  46. data/ext/cld3/script_detector.h +156 -0
  47. data/ext/cld3/sentence.proto +77 -0
  48. data/ext/cld3/sentence_features.cc +29 -0
  49. data/ext/cld3/sentence_features.h +35 -0
  50. data/ext/cld3/simple_adder.h +72 -0
  51. data/ext/cld3/stringpiece.h +81 -0
  52. data/ext/cld3/task_context.cc +161 -0
  53. data/ext/cld3/task_context.h +81 -0
  54. data/ext/cld3/task_context_params.cc +74 -0
  55. data/ext/cld3/task_context_params.h +54 -0
  56. data/ext/cld3/task_spec.proto +98 -0
  57. data/ext/cld3/text_processing.cc +245 -0
  58. data/ext/cld3/text_processing.h +30 -0
  59. data/ext/cld3/unicodetext.cc +96 -0
  60. data/ext/cld3/unicodetext.h +144 -0
  61. data/ext/cld3/utf8acceptinterchange.h +486 -0
  62. data/ext/cld3/utf8prop_lettermarkscriptnum.h +1631 -0
  63. data/ext/cld3/utf8repl_lettermarklower.h +758 -0
  64. data/ext/cld3/utf8scannot_lettermarkspecial.h +1455 -0
  65. data/ext/cld3/utf8statetable.cc +1344 -0
  66. data/ext/cld3/utf8statetable.h +285 -0
  67. data/ext/cld3/utils.cc +241 -0
  68. data/ext/cld3/utils.h +144 -0
  69. data/ext/cld3/workspace.cc +64 -0
  70. data/ext/cld3/workspace.h +177 -0
  71. data/lib/cld3.rb +99 -0
  72. metadata +158 -0
@@ -0,0 +1,89 @@
1
+ /* Copyright 2016 Google Inc. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+
16
+ #include "relevant_script_feature.h"
17
+
18
+ #include <ctype.h>
19
+
20
+ #include <string>
21
+
22
+ #include "feature_extractor.h"
23
+ #include "feature_types.h"
24
+ #include "language_identifier_features.h"
25
+ #include "script_detector.h"
26
+ #include "cld_3/protos/sentence.pb.h"
27
+ #include "sentence_features.h"
28
+ #include "task_context.h"
29
+ #include "utils.h"
30
+ #include "workspace.h"
31
+
32
+ namespace chrome_lang_id {
33
+ void RelevantScriptFeature::Setup(TaskContext *context) {
34
+ // Nothing.
35
+ }
36
+
37
+ void RelevantScriptFeature::Init(TaskContext *context) {
38
+ set_feature_type(new NumericFeatureType(name(), kNumRelevantScripts));
39
+ }
40
+
41
+ void RelevantScriptFeature::Evaluate(const WorkspaceSet &workspaces,
42
+ const Sentence &sentence,
43
+ FeatureVector *result) const {
44
+ const string &text = sentence.text();
45
+
46
+ // We expect kNumRelevantScripts to be small, so we stack-allocate the array
47
+ // of counts. Still, if that changes, we want to find out.
48
+ static_assert(
49
+ kNumRelevantScripts < 25,
50
+ "switch counts to vector<int>: too big for stack-allocated int[]");
51
+
52
+ // counts[s] is the number of characters with script s.
53
+ // Note: {} "value-initializes" the array to zero.
54
+ int counts[kNumRelevantScripts]{};
55
+ int total_count = 0;
56
+ const char *const text_end = text.data() + text.size();
57
+ for (const char *curr = text.data(); curr < text_end;
58
+ curr += utils::OneCharLen(curr)) {
59
+ const int num_bytes = utils::OneCharLen(curr);
60
+
61
+ // If a partial UTF-8 character is encountered, break out of the loop.
62
+ if (curr + num_bytes > text_end) {
63
+ break;
64
+ }
65
+
66
+ // Skip spaces, numbers, punctuation, and all other non-alpha ASCII
67
+ // characters: these characters are used in so many languages, they do not
68
+ // communicate language-related information.
69
+ if ((num_bytes == 1) && !isalpha(*curr)) {
70
+ continue;
71
+ }
72
+ Script script = GetScript(curr, num_bytes);
73
+ CLD3_DCHECK(script >= 0);
74
+ CLD3_DCHECK(script < kNumRelevantScripts);
75
+ counts[static_cast<int>(script)]++;
76
+ total_count++;
77
+ }
78
+
79
+ for (int script_id = 0; script_id < kNumRelevantScripts; ++script_id) {
80
+ int count = counts[script_id];
81
+ if (count > 0) {
82
+ const float weight = static_cast<float>(count) / total_count;
83
+ FloatFeatureValue value(script_id, weight);
84
+ result->add(feature_type(), value.discrete_value);
85
+ }
86
+ }
87
+ }
88
+
89
+ } // namespace chrome_lang_id
@@ -0,0 +1,49 @@
1
+ /* Copyright 2016 Google Inc. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+
16
+ #ifndef RELEVANT_SCRIPT_FEATURE_H_
17
+ #define RELEVANT_SCRIPT_FEATURE_H_
18
+
19
+ #include "feature_extractor.h"
20
+ #include "cld_3/protos/sentence.pb.h"
21
+ #include "sentence_features.h"
22
+ #include "task_context.h"
23
+ #include "workspace.h"
24
+
25
+ namespace chrome_lang_id {
26
+
27
+ // Given a sentence, generates one FloatFeatureValue for each "relevant" Unicode
28
+ // script (see below): each such feature indicates the script and the ratio of
29
+ // UTF8 characters in that script, in the given sentence.
30
+ //
31
+ // What is a relevant script? Recognizing all 100+ Unicode scripts would
32
+ // require too much code size and runtime. Instead, we focus only on a few
33
+ // scripts that communicate a lot of language information: e.g., the use of
34
+ // Hiragana characters almost always indicates Japanese, so Hiragana is a
35
+ // "relevant" script for us. The Latin script is used by dozens of language, so
36
+ // Latin is not relevant in this context.
37
+ class RelevantScriptFeature : public WholeSentenceFeature {
38
+ public:
39
+ void Setup(TaskContext *context) override;
40
+ void Init(TaskContext *context) override;
41
+
42
+ // Appends the features computed from the sentence to the feature vector.
43
+ void Evaluate(const WorkspaceSet &workspaces, const Sentence &sentence,
44
+ FeatureVector *result) const override;
45
+ };
46
+
47
+ } // namespace chrome_lang_id
48
+
49
+ #endif // RELEVANT_SCRIPT_FEATURE_H_
@@ -0,0 +1,156 @@
1
+ /* Copyright 2016 Google Inc. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+
16
+ #ifndef SCRIPT_DETECTOR_H_
17
+ #define SCRIPT_DETECTOR_H_
18
+
19
+ namespace chrome_lang_id {
20
+
21
+ // Unicode scripts we care about. To get compact and fast code, we detect only
22
+ // a few Unicode scripts that offer a strong indication about the language of
23
+ // the text (e.g., Hiragana -> Japanese).
24
+ enum Script {
25
+ // Special value to indicate internal errors in the script detection code.
26
+ kScriptError,
27
+
28
+ // Special values for all Unicode scripts that we do not detect. One special
29
+ // value for Unicode characters of 1, 2, 3, respectively 4 bytes (as we
30
+ // already have that information, we use it). kScriptOtherUtf8OneByte means
31
+ // ~Latin and kScriptOtherUtf8FourBytes means ~Han.
32
+ kScriptOtherUtf8OneByte,
33
+ kScriptOtherUtf8TwoBytes,
34
+ kScriptOtherUtf8ThreeBytes,
35
+ kScriptOtherUtf8FourBytes,
36
+
37
+ kScriptGreek,
38
+ kScriptCyrillic,
39
+ kScriptHebrew,
40
+ kScriptArabic,
41
+ kScriptHangulJamo, // Used primarily for Korean.
42
+ kScriptHiragana, // Used primarily for Japanese.
43
+ kScriptKatakana, // Used primarily for Japanese.
44
+
45
+ // Add new scripts here.
46
+
47
+ // Do not add any script after kNumRelevantScripts. This value indicates the
48
+ // number of elements in this enum Script (except this value) such that we can
49
+ // easily iterate over the scripts.
50
+ kNumRelevantScripts,
51
+ };
52
+
53
+ template <typename IntType>
54
+ inline bool InRange(IntType value, IntType low, IntType hi) {
55
+ return (value >= low) && (value <= hi);
56
+ }
57
+
58
+ // Returns Script for the UTF8 character that starts at address p.
59
+ // Precondition: p points to a valid UTF8 character of num_bytes bytes.
60
+ inline Script GetScript(const unsigned char *p, int num_bytes) {
61
+ switch (num_bytes) {
62
+ case 1:
63
+ return kScriptOtherUtf8OneByte;
64
+
65
+ case 2: {
66
+ // 2-byte UTF8 characters have 11 bits of information. unsigned int has
67
+ // at least 16 bits (http://en.cppreference.com/w/cpp/language/types) so
68
+ // it's enough. It's also usually the fastest int type on the current
69
+ // CPU, so it's better to use than int32.
70
+ static const unsigned int kGreekStart = 0x370;
71
+
72
+ // Commented out (unsued in the code): kGreekEnd = 0x3FF;
73
+ static const unsigned int kCyrillicStart = 0x400;
74
+ static const unsigned int kCyrillicEnd = 0x4FF;
75
+ static const unsigned int kHebrewStart = 0x590;
76
+
77
+ // Commented out (unsued in the code): kHebrewEnd = 0x5FF;
78
+ static const unsigned int kArabicStart = 0x600;
79
+ static const unsigned int kArabicEnd = 0x6FF;
80
+ const unsigned int codepoint = ((p[0] & 0x1F) << 6) | (p[1] & 0x3F);
81
+ if (codepoint > kCyrillicEnd) {
82
+ if (codepoint >= kArabicStart) {
83
+ if (codepoint <= kArabicEnd) {
84
+ return kScriptArabic;
85
+ }
86
+ } else {
87
+ // At this point, codepoint < kArabicStart = kHebrewEnd + 1, so
88
+ // codepoint <= kHebrewEnd.
89
+ if (codepoint >= kHebrewStart) {
90
+ return kScriptHebrew;
91
+ }
92
+ }
93
+ } else {
94
+ if (codepoint >= kCyrillicStart) {
95
+ return kScriptCyrillic;
96
+ } else {
97
+ // At this point, codepoint < kCyrillicStart = kGreekEnd + 1, so
98
+ // codepoint <= kGreekEnd.
99
+ if (codepoint >= kGreekStart) {
100
+ return kScriptGreek;
101
+ }
102
+ }
103
+ }
104
+ return kScriptOtherUtf8TwoBytes;
105
+ }
106
+
107
+ case 3: {
108
+ // 3-byte UTF8 characters have 16 bits of information. unsigned int has
109
+ // at least 16 bits.
110
+ static const unsigned int kHangulJamoStart = 0x1100;
111
+ static const unsigned int kHangulJamoEnd = 0x11FF;
112
+ static const unsigned int kHiraganaStart = 0x3041;
113
+ static const unsigned int kHiraganaEnd = 0x309F;
114
+
115
+ // Commented out (unsued in the code): kKatakanaStart = 0x30A0;
116
+ static const unsigned int kKatakanaEnd = 0x30FF;
117
+ const unsigned int codepoint =
118
+ ((p[0] & 0x0F) << 12) | ((p[1] & 0x3F) << 6) | (p[2] & 0x3F);
119
+ if (codepoint > kHiraganaEnd) {
120
+ // On this branch, codepoint > kHiraganaEnd = kKatakanaStart - 1, so
121
+ // codepoint >= kKatakanaStart.
122
+ if (codepoint <= kKatakanaEnd) {
123
+ return kScriptKatakana;
124
+ }
125
+ } else {
126
+ if (codepoint >= kHiraganaStart) {
127
+ return kScriptHiragana;
128
+ } else {
129
+ if (InRange(codepoint, kHangulJamoStart, kHangulJamoEnd)) {
130
+ return kScriptHangulJamo;
131
+ }
132
+ }
133
+ }
134
+ return kScriptOtherUtf8ThreeBytes;
135
+ }
136
+
137
+ case 4:
138
+ return kScriptOtherUtf8FourBytes;
139
+
140
+ default:
141
+ return kScriptError;
142
+ }
143
+ }
144
+
145
+ // Returns Script for the UTF8 character that starts at address p. Similar to
146
+ // the previous version of GetScript, except for "char" vs "unsigned char".
147
+ // Most code works with "char *" pointers, ignoring the fact that char is
148
+ // unsigned (by default) on most platforms, but signed on iOS. This code takes
149
+ // care of making sure we always treat chars as unsigned.
150
+ inline Script GetScript(const char *p, int num_bytes) {
151
+ return GetScript(reinterpret_cast<const unsigned char *>(p), num_bytes);
152
+ }
153
+
154
+ } // namespace chrome_lang_id
155
+
156
+ #endif // SCRIPT_DETECTOR_H_
@@ -0,0 +1,77 @@
1
+ /* Copyright 2016 Google Inc. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+
16
+ // Protocol buffer specification for sentence analysis.
17
+
18
+ syntax = "proto2";
19
+ option optimize_for = LITE_RUNTIME;
20
+
21
+ package chrome_lang_id;
22
+
23
+ // A Sentence contains the raw text contents of a sentence, as well as an
24
+ // analysis.
25
+ message Sentence {
26
+ // Identifier for sentence.
27
+ optional string id = 1;
28
+
29
+ // Raw text contents of the sentence.
30
+ optional string text = 2;
31
+
32
+ // Tokenization of the sentence.
33
+ repeated Token token = 3;
34
+
35
+ extensions 1000 to max;
36
+ }
37
+
38
+ // A sentence token marks a span of bytes in the sentence text as a token
39
+ // or word.
40
+ message Token {
41
+ // Token word form.
42
+ required string word = 1;
43
+
44
+ // Start position of token in text.
45
+ required int32 start = 2;
46
+
47
+ // End position of token in text. Gives index of last byte, not one past
48
+ // the last byte. If token came from lexer, excludes any trailing HTML tags.
49
+ required int32 end = 3;
50
+
51
+ // Head of this token in the dependency tree: the id of the token which has an
52
+ // arc going to this one. If it is the root token of a sentence, then it is
53
+ // set to -1.
54
+ optional int32 head = 4 [default = -1];
55
+
56
+ // Part-of-speech tag for token.
57
+ optional string tag = 5;
58
+
59
+ // Coarse-grained word category for token.
60
+ optional string category = 6;
61
+
62
+ // Label for dependency relation between this token and its head.
63
+ optional string label = 7;
64
+
65
+ // Break level for tokens that indicates how it was separated from the
66
+ // previous token in the text.
67
+ enum BreakLevel {
68
+ NO_BREAK = 0; // No separation between tokens.
69
+ SPACE_BREAK = 1; // Tokens separated by space.
70
+ LINE_BREAK = 2; // Tokens separated by line break.
71
+ SENTENCE_BREAK = 3; // Tokens separated by sentence break.
72
+ }
73
+
74
+ optional BreakLevel break_level = 8 [default = SPACE_BREAK];
75
+
76
+ extensions 1000 to max;
77
+ }
@@ -0,0 +1,29 @@
1
+ /* Copyright 2016 Google Inc. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+
16
+ #include "sentence_features.h"
17
+
18
+ #include "registry.h"
19
+
20
+ namespace chrome_lang_id {
21
+
22
+ // Declare registry for the whole Sentence feature functions. NOTE: this is not
23
+ // yet set to anything meaningful. It will be set so in NNetLanguageIdentifier
24
+ // constructor, *before* we use any feature.
25
+ template <>
26
+ WholeSentenceFeature::Registry
27
+ *RegisterableClass<WholeSentenceFeature>::registry_ = nullptr;
28
+
29
+ } // namespace chrome_lang_id
@@ -0,0 +1,35 @@
1
+ /* Copyright 2016 Google Inc. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+
16
+ // Features that operate on Sentence objects. Most features are defined
17
+ // in this header so they may be re-used via composition into other more
18
+ // advanced feature classes.
19
+
20
+ #ifndef SENTENCE_FEATURES_H_
21
+ #define SENTENCE_FEATURES_H_
22
+
23
+ #include "feature_extractor.h"
24
+ #include "cld_3/protos/sentence.pb.h"
25
+
26
+ namespace chrome_lang_id {
27
+
28
+ // Feature function that extracts features for the full Sentence.
29
+ typedef FeatureFunction<Sentence> WholeSentenceFeature;
30
+
31
+ typedef FeatureExtractor<Sentence> WholeSentenceExtractor;
32
+
33
+ } // namespace chrome_lang_id
34
+
35
+ #endif // SENTENCE_FEATURES_H_
@@ -0,0 +1,72 @@
1
+ /* Copyright 2016 Google Inc. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+
16
+ #ifndef SIMPLE_ADDER_H_
17
+ #define SIMPLE_ADDER_H_
18
+
19
+ #include "base.h"
20
+
21
+ namespace chrome_lang_id {
22
+
23
+ // Class for adding (possibly) scaled arrays.
24
+ class SimpleAdder {
25
+ public:
26
+ static constexpr const int kNumFloatsPerBatch = 1;
27
+
28
+ CLD3_ATTRIBUTE_ALWAYS_INLINE SimpleAdder(float *dest, int num_floats)
29
+ : dest_(dest), num_floats_(num_floats) {}
30
+
31
+ CLD3_ATTRIBUTE_ALWAYS_INLINE ~SimpleAdder() {
32
+ // Should call Finalize function before destruction.
33
+ CLD3_DCHECK(dest_ == nullptr);
34
+ }
35
+
36
+ // Caller must call this function before calling deconstruct this object.
37
+ CLD3_ATTRIBUTE_ALWAYS_INLINE void Finalize() { dest_ = nullptr; }
38
+
39
+ CLD3_ATTRIBUTE_ALWAYS_INLINE void LazyAdd(const float *source) const {
40
+ AddImpl(source, num_floats_, dest_);
41
+ }
42
+
43
+ CLD3_ATTRIBUTE_ALWAYS_INLINE void LazyScaleAdd(const float *source,
44
+ const float scale) const {
45
+ ScaleAddImpl(source, num_floats_, scale, dest_);
46
+ }
47
+
48
+ // Simple fast while loop to implement dest += source.
49
+ CLD3_ATTRIBUTE_ALWAYS_INLINE static void AddImpl(
50
+ const float *__restrict source, uint32 size, float *__restrict dest) {
51
+ for (uint32 i = 0; i < size; ++i) {
52
+ dest[i] += source[i];
53
+ }
54
+ }
55
+
56
+ // Simple fast while loop to implement dest += scale * source.
57
+ CLD3_ATTRIBUTE_ALWAYS_INLINE static void ScaleAddImpl(
58
+ const float *__restrict source, uint32 size, const float scale,
59
+ float *__restrict dest) {
60
+ for (uint32 i = 0; i < size; ++i) {
61
+ dest[i] += source[i] * scale;
62
+ }
63
+ }
64
+
65
+ private:
66
+ float *dest_;
67
+ int num_floats_;
68
+ };
69
+
70
+ } // namespace chrome_lang_id
71
+
72
+ #endif // SIMPLE_ADDER_H_