cld3 3.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +18 -0
  3. data/LICENSE +204 -0
  4. data/LICENSE_CLD3 +203 -0
  5. data/README.md +22 -0
  6. data/cld3.gemspec +35 -0
  7. data/ext/cld3/base.cc +36 -0
  8. data/ext/cld3/base.h +106 -0
  9. data/ext/cld3/casts.h +98 -0
  10. data/ext/cld3/embedding_feature_extractor.cc +51 -0
  11. data/ext/cld3/embedding_feature_extractor.h +182 -0
  12. data/ext/cld3/embedding_network.cc +196 -0
  13. data/ext/cld3/embedding_network.h +186 -0
  14. data/ext/cld3/embedding_network_params.h +285 -0
  15. data/ext/cld3/extconf.rb +49 -0
  16. data/ext/cld3/feature_extractor.cc +137 -0
  17. data/ext/cld3/feature_extractor.h +633 -0
  18. data/ext/cld3/feature_extractor.proto +50 -0
  19. data/ext/cld3/feature_types.cc +72 -0
  20. data/ext/cld3/feature_types.h +158 -0
  21. data/ext/cld3/fixunicodevalue.cc +55 -0
  22. data/ext/cld3/fixunicodevalue.h +69 -0
  23. data/ext/cld3/float16.h +58 -0
  24. data/ext/cld3/fml_parser.cc +308 -0
  25. data/ext/cld3/fml_parser.h +123 -0
  26. data/ext/cld3/generated_entities.cc +296 -0
  27. data/ext/cld3/generated_ulscript.cc +678 -0
  28. data/ext/cld3/generated_ulscript.h +142 -0
  29. data/ext/cld3/getonescriptspan.cc +1109 -0
  30. data/ext/cld3/getonescriptspan.h +124 -0
  31. data/ext/cld3/integral_types.h +37 -0
  32. data/ext/cld3/lang_id_nn_params.cc +57449 -0
  33. data/ext/cld3/lang_id_nn_params.h +178 -0
  34. data/ext/cld3/language_identifier_features.cc +165 -0
  35. data/ext/cld3/language_identifier_features.h +116 -0
  36. data/ext/cld3/nnet_language_identifier.cc +380 -0
  37. data/ext/cld3/nnet_language_identifier.h +175 -0
  38. data/ext/cld3/nnet_language_identifier_c.cc +72 -0
  39. data/ext/cld3/offsetmap.cc +478 -0
  40. data/ext/cld3/offsetmap.h +168 -0
  41. data/ext/cld3/port.h +143 -0
  42. data/ext/cld3/registry.cc +28 -0
  43. data/ext/cld3/registry.h +242 -0
  44. data/ext/cld3/relevant_script_feature.cc +89 -0
  45. data/ext/cld3/relevant_script_feature.h +49 -0
  46. data/ext/cld3/script_detector.h +156 -0
  47. data/ext/cld3/sentence.proto +77 -0
  48. data/ext/cld3/sentence_features.cc +29 -0
  49. data/ext/cld3/sentence_features.h +35 -0
  50. data/ext/cld3/simple_adder.h +72 -0
  51. data/ext/cld3/stringpiece.h +81 -0
  52. data/ext/cld3/task_context.cc +161 -0
  53. data/ext/cld3/task_context.h +81 -0
  54. data/ext/cld3/task_context_params.cc +74 -0
  55. data/ext/cld3/task_context_params.h +54 -0
  56. data/ext/cld3/task_spec.proto +98 -0
  57. data/ext/cld3/text_processing.cc +245 -0
  58. data/ext/cld3/text_processing.h +30 -0
  59. data/ext/cld3/unicodetext.cc +96 -0
  60. data/ext/cld3/unicodetext.h +144 -0
  61. data/ext/cld3/utf8acceptinterchange.h +486 -0
  62. data/ext/cld3/utf8prop_lettermarkscriptnum.h +1631 -0
  63. data/ext/cld3/utf8repl_lettermarklower.h +758 -0
  64. data/ext/cld3/utf8scannot_lettermarkspecial.h +1455 -0
  65. data/ext/cld3/utf8statetable.cc +1344 -0
  66. data/ext/cld3/utf8statetable.h +285 -0
  67. data/ext/cld3/utils.cc +241 -0
  68. data/ext/cld3/utils.h +144 -0
  69. data/ext/cld3/workspace.cc +64 -0
  70. data/ext/cld3/workspace.h +177 -0
  71. data/lib/cld3.rb +99 -0
  72. metadata +158 -0
@@ -0,0 +1,89 @@
1
+ /* Copyright 2016 Google Inc. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+
16
+ #include "relevant_script_feature.h"
17
+
18
+ #include <ctype.h>
19
+
20
+ #include <string>
21
+
22
+ #include "feature_extractor.h"
23
+ #include "feature_types.h"
24
+ #include "language_identifier_features.h"
25
+ #include "script_detector.h"
26
+ #include "cld_3/protos/sentence.pb.h"
27
+ #include "sentence_features.h"
28
+ #include "task_context.h"
29
+ #include "utils.h"
30
+ #include "workspace.h"
31
+
32
+ namespace chrome_lang_id {
33
+ void RelevantScriptFeature::Setup(TaskContext *context) {
34
+ // Nothing.
35
+ }
36
+
37
+ void RelevantScriptFeature::Init(TaskContext *context) {
38
+ set_feature_type(new NumericFeatureType(name(), kNumRelevantScripts));
39
+ }
40
+
41
+ void RelevantScriptFeature::Evaluate(const WorkspaceSet &workspaces,
42
+ const Sentence &sentence,
43
+ FeatureVector *result) const {
44
+ const string &text = sentence.text();
45
+
46
+ // We expect kNumRelevantScripts to be small, so we stack-allocate the array
47
+ // of counts. Still, if that changes, we want to find out.
48
+ static_assert(
49
+ kNumRelevantScripts < 25,
50
+ "switch counts to vector<int>: too big for stack-allocated int[]");
51
+
52
+ // counts[s] is the number of characters with script s.
53
+ // Note: {} "value-initializes" the array to zero.
54
+ int counts[kNumRelevantScripts]{};
55
+ int total_count = 0;
56
+ const char *const text_end = text.data() + text.size();
57
+ for (const char *curr = text.data(); curr < text_end;
58
+ curr += utils::OneCharLen(curr)) {
59
+ const int num_bytes = utils::OneCharLen(curr);
60
+
61
+ // If a partial UTF-8 character is encountered, break out of the loop.
62
+ if (curr + num_bytes > text_end) {
63
+ break;
64
+ }
65
+
66
+ // Skip spaces, numbers, punctuation, and all other non-alpha ASCII
67
+ // characters: these characters are used in so many languages, they do not
68
+ // communicate language-related information.
69
+ if ((num_bytes == 1) && !isalpha(*curr)) {
70
+ continue;
71
+ }
72
+ Script script = GetScript(curr, num_bytes);
73
+ CLD3_DCHECK(script >= 0);
74
+ CLD3_DCHECK(script < kNumRelevantScripts);
75
+ counts[static_cast<int>(script)]++;
76
+ total_count++;
77
+ }
78
+
79
+ for (int script_id = 0; script_id < kNumRelevantScripts; ++script_id) {
80
+ int count = counts[script_id];
81
+ if (count > 0) {
82
+ const float weight = static_cast<float>(count) / total_count;
83
+ FloatFeatureValue value(script_id, weight);
84
+ result->add(feature_type(), value.discrete_value);
85
+ }
86
+ }
87
+ }
88
+
89
+ } // namespace chrome_lang_id
@@ -0,0 +1,49 @@
1
+ /* Copyright 2016 Google Inc. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+
16
+ #ifndef RELEVANT_SCRIPT_FEATURE_H_
17
+ #define RELEVANT_SCRIPT_FEATURE_H_
18
+
19
+ #include "feature_extractor.h"
20
+ #include "cld_3/protos/sentence.pb.h"
21
+ #include "sentence_features.h"
22
+ #include "task_context.h"
23
+ #include "workspace.h"
24
+
25
+ namespace chrome_lang_id {
26
+
27
+ // Given a sentence, generates one FloatFeatureValue for each "relevant" Unicode
28
+ // script (see below): each such feature indicates the script and the ratio of
29
+ // UTF8 characters in that script, in the given sentence.
30
+ //
31
+ // What is a relevant script? Recognizing all 100+ Unicode scripts would
32
+ // require too much code size and runtime. Instead, we focus only on a few
33
+ // scripts that communicate a lot of language information: e.g., the use of
34
+ // Hiragana characters almost always indicates Japanese, so Hiragana is a
35
+ // "relevant" script for us. The Latin script is used by dozens of language, so
36
+ // Latin is not relevant in this context.
37
+ class RelevantScriptFeature : public WholeSentenceFeature {
38
+ public:
39
+ void Setup(TaskContext *context) override;
40
+ void Init(TaskContext *context) override;
41
+
42
+ // Appends the features computed from the sentence to the feature vector.
43
+ void Evaluate(const WorkspaceSet &workspaces, const Sentence &sentence,
44
+ FeatureVector *result) const override;
45
+ };
46
+
47
+ } // namespace chrome_lang_id
48
+
49
+ #endif // RELEVANT_SCRIPT_FEATURE_H_
@@ -0,0 +1,156 @@
1
+ /* Copyright 2016 Google Inc. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+
16
+ #ifndef SCRIPT_DETECTOR_H_
17
+ #define SCRIPT_DETECTOR_H_
18
+
19
+ namespace chrome_lang_id {
20
+
21
+ // Unicode scripts we care about. To get compact and fast code, we detect only
22
+ // a few Unicode scripts that offer a strong indication about the language of
23
+ // the text (e.g., Hiragana -> Japanese).
24
+ enum Script {
25
+ // Special value to indicate internal errors in the script detection code.
26
+ kScriptError,
27
+
28
+ // Special values for all Unicode scripts that we do not detect. One special
29
+ // value for Unicode characters of 1, 2, 3, respectively 4 bytes (as we
30
+ // already have that information, we use it). kScriptOtherUtf8OneByte means
31
+ // ~Latin and kScriptOtherUtf8FourBytes means ~Han.
32
+ kScriptOtherUtf8OneByte,
33
+ kScriptOtherUtf8TwoBytes,
34
+ kScriptOtherUtf8ThreeBytes,
35
+ kScriptOtherUtf8FourBytes,
36
+
37
+ kScriptGreek,
38
+ kScriptCyrillic,
39
+ kScriptHebrew,
40
+ kScriptArabic,
41
+ kScriptHangulJamo, // Used primarily for Korean.
42
+ kScriptHiragana, // Used primarily for Japanese.
43
+ kScriptKatakana, // Used primarily for Japanese.
44
+
45
+ // Add new scripts here.
46
+
47
+ // Do not add any script after kNumRelevantScripts. This value indicates the
48
+ // number of elements in this enum Script (except this value) such that we can
49
+ // easily iterate over the scripts.
50
+ kNumRelevantScripts,
51
+ };
52
+
53
+ template <typename IntType>
54
+ inline bool InRange(IntType value, IntType low, IntType hi) {
55
+ return (value >= low) && (value <= hi);
56
+ }
57
+
58
+ // Returns Script for the UTF8 character that starts at address p.
59
+ // Precondition: p points to a valid UTF8 character of num_bytes bytes.
60
+ inline Script GetScript(const unsigned char *p, int num_bytes) {
61
+ switch (num_bytes) {
62
+ case 1:
63
+ return kScriptOtherUtf8OneByte;
64
+
65
+ case 2: {
66
+ // 2-byte UTF8 characters have 11 bits of information. unsigned int has
67
+ // at least 16 bits (http://en.cppreference.com/w/cpp/language/types) so
68
+ // it's enough. It's also usually the fastest int type on the current
69
+ // CPU, so it's better to use than int32.
70
+ static const unsigned int kGreekStart = 0x370;
71
+
72
+ // Commented out (unsued in the code): kGreekEnd = 0x3FF;
73
+ static const unsigned int kCyrillicStart = 0x400;
74
+ static const unsigned int kCyrillicEnd = 0x4FF;
75
+ static const unsigned int kHebrewStart = 0x590;
76
+
77
+ // Commented out (unsued in the code): kHebrewEnd = 0x5FF;
78
+ static const unsigned int kArabicStart = 0x600;
79
+ static const unsigned int kArabicEnd = 0x6FF;
80
+ const unsigned int codepoint = ((p[0] & 0x1F) << 6) | (p[1] & 0x3F);
81
+ if (codepoint > kCyrillicEnd) {
82
+ if (codepoint >= kArabicStart) {
83
+ if (codepoint <= kArabicEnd) {
84
+ return kScriptArabic;
85
+ }
86
+ } else {
87
+ // At this point, codepoint < kArabicStart = kHebrewEnd + 1, so
88
+ // codepoint <= kHebrewEnd.
89
+ if (codepoint >= kHebrewStart) {
90
+ return kScriptHebrew;
91
+ }
92
+ }
93
+ } else {
94
+ if (codepoint >= kCyrillicStart) {
95
+ return kScriptCyrillic;
96
+ } else {
97
+ // At this point, codepoint < kCyrillicStart = kGreekEnd + 1, so
98
+ // codepoint <= kGreekEnd.
99
+ if (codepoint >= kGreekStart) {
100
+ return kScriptGreek;
101
+ }
102
+ }
103
+ }
104
+ return kScriptOtherUtf8TwoBytes;
105
+ }
106
+
107
+ case 3: {
108
+ // 3-byte UTF8 characters have 16 bits of information. unsigned int has
109
+ // at least 16 bits.
110
+ static const unsigned int kHangulJamoStart = 0x1100;
111
+ static const unsigned int kHangulJamoEnd = 0x11FF;
112
+ static const unsigned int kHiraganaStart = 0x3041;
113
+ static const unsigned int kHiraganaEnd = 0x309F;
114
+
115
+ // Commented out (unsued in the code): kKatakanaStart = 0x30A0;
116
+ static const unsigned int kKatakanaEnd = 0x30FF;
117
+ const unsigned int codepoint =
118
+ ((p[0] & 0x0F) << 12) | ((p[1] & 0x3F) << 6) | (p[2] & 0x3F);
119
+ if (codepoint > kHiraganaEnd) {
120
+ // On this branch, codepoint > kHiraganaEnd = kKatakanaStart - 1, so
121
+ // codepoint >= kKatakanaStart.
122
+ if (codepoint <= kKatakanaEnd) {
123
+ return kScriptKatakana;
124
+ }
125
+ } else {
126
+ if (codepoint >= kHiraganaStart) {
127
+ return kScriptHiragana;
128
+ } else {
129
+ if (InRange(codepoint, kHangulJamoStart, kHangulJamoEnd)) {
130
+ return kScriptHangulJamo;
131
+ }
132
+ }
133
+ }
134
+ return kScriptOtherUtf8ThreeBytes;
135
+ }
136
+
137
+ case 4:
138
+ return kScriptOtherUtf8FourBytes;
139
+
140
+ default:
141
+ return kScriptError;
142
+ }
143
+ }
144
+
145
+ // Returns Script for the UTF8 character that starts at address p. Similar to
146
+ // the previous version of GetScript, except for "char" vs "unsigned char".
147
+ // Most code works with "char *" pointers, ignoring the fact that char is
148
+ // unsigned (by default) on most platforms, but signed on iOS. This code takes
149
+ // care of making sure we always treat chars as unsigned.
150
+ inline Script GetScript(const char *p, int num_bytes) {
151
+ return GetScript(reinterpret_cast<const unsigned char *>(p), num_bytes);
152
+ }
153
+
154
+ } // namespace chrome_lang_id
155
+
156
+ #endif // SCRIPT_DETECTOR_H_
@@ -0,0 +1,77 @@
1
+ /* Copyright 2016 Google Inc. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+
16
+ // Protocol buffer specification for sentence analysis.
17
+
18
+ syntax = "proto2";
19
+ option optimize_for = LITE_RUNTIME;
20
+
21
+ package chrome_lang_id;
22
+
23
+ // A Sentence contains the raw text contents of a sentence, as well as an
24
+ // analysis.
25
+ message Sentence {
26
+ // Identifier for sentence.
27
+ optional string id = 1;
28
+
29
+ // Raw text contents of the sentence.
30
+ optional string text = 2;
31
+
32
+ // Tokenization of the sentence.
33
+ repeated Token token = 3;
34
+
35
+ extensions 1000 to max;
36
+ }
37
+
38
+ // A sentence token marks a span of bytes in the sentence text as a token
39
+ // or word.
40
+ message Token {
41
+ // Token word form.
42
+ required string word = 1;
43
+
44
+ // Start position of token in text.
45
+ required int32 start = 2;
46
+
47
+ // End position of token in text. Gives index of last byte, not one past
48
+ // the last byte. If token came from lexer, excludes any trailing HTML tags.
49
+ required int32 end = 3;
50
+
51
+ // Head of this token in the dependency tree: the id of the token which has an
52
+ // arc going to this one. If it is the root token of a sentence, then it is
53
+ // set to -1.
54
+ optional int32 head = 4 [default = -1];
55
+
56
+ // Part-of-speech tag for token.
57
+ optional string tag = 5;
58
+
59
+ // Coarse-grained word category for token.
60
+ optional string category = 6;
61
+
62
+ // Label for dependency relation between this token and its head.
63
+ optional string label = 7;
64
+
65
+ // Break level for tokens that indicates how it was separated from the
66
+ // previous token in the text.
67
+ enum BreakLevel {
68
+ NO_BREAK = 0; // No separation between tokens.
69
+ SPACE_BREAK = 1; // Tokens separated by space.
70
+ LINE_BREAK = 2; // Tokens separated by line break.
71
+ SENTENCE_BREAK = 3; // Tokens separated by sentence break.
72
+ }
73
+
74
+ optional BreakLevel break_level = 8 [default = SPACE_BREAK];
75
+
76
+ extensions 1000 to max;
77
+ }
@@ -0,0 +1,29 @@
1
+ /* Copyright 2016 Google Inc. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+
16
+ #include "sentence_features.h"
17
+
18
+ #include "registry.h"
19
+
20
+ namespace chrome_lang_id {
21
+
22
+ // Declare registry for the whole Sentence feature functions. NOTE: this is not
23
+ // yet set to anything meaningful. It will be set so in NNetLanguageIdentifier
24
+ // constructor, *before* we use any feature.
25
+ template <>
26
+ WholeSentenceFeature::Registry
27
+ *RegisterableClass<WholeSentenceFeature>::registry_ = nullptr;
28
+
29
+ } // namespace chrome_lang_id
@@ -0,0 +1,35 @@
1
+ /* Copyright 2016 Google Inc. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+
16
+ // Features that operate on Sentence objects. Most features are defined
17
+ // in this header so they may be re-used via composition into other more
18
+ // advanced feature classes.
19
+
20
+ #ifndef SENTENCE_FEATURES_H_
21
+ #define SENTENCE_FEATURES_H_
22
+
23
+ #include "feature_extractor.h"
24
+ #include "cld_3/protos/sentence.pb.h"
25
+
26
+ namespace chrome_lang_id {
27
+
28
+ // Feature function that extracts features for the full Sentence.
29
+ typedef FeatureFunction<Sentence> WholeSentenceFeature;
30
+
31
+ typedef FeatureExtractor<Sentence> WholeSentenceExtractor;
32
+
33
+ } // namespace chrome_lang_id
34
+
35
+ #endif // SENTENCE_FEATURES_H_
@@ -0,0 +1,72 @@
1
+ /* Copyright 2016 Google Inc. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+
16
+ #ifndef SIMPLE_ADDER_H_
17
+ #define SIMPLE_ADDER_H_
18
+
19
+ #include "base.h"
20
+
21
+ namespace chrome_lang_id {
22
+
23
+ // Class for adding (possibly) scaled arrays.
24
+ class SimpleAdder {
25
+ public:
26
+ static constexpr const int kNumFloatsPerBatch = 1;
27
+
28
+ CLD3_ATTRIBUTE_ALWAYS_INLINE SimpleAdder(float *dest, int num_floats)
29
+ : dest_(dest), num_floats_(num_floats) {}
30
+
31
+ CLD3_ATTRIBUTE_ALWAYS_INLINE ~SimpleAdder() {
32
+ // Should call Finalize function before destruction.
33
+ CLD3_DCHECK(dest_ == nullptr);
34
+ }
35
+
36
+ // Caller must call this function before calling deconstruct this object.
37
+ CLD3_ATTRIBUTE_ALWAYS_INLINE void Finalize() { dest_ = nullptr; }
38
+
39
+ CLD3_ATTRIBUTE_ALWAYS_INLINE void LazyAdd(const float *source) const {
40
+ AddImpl(source, num_floats_, dest_);
41
+ }
42
+
43
+ CLD3_ATTRIBUTE_ALWAYS_INLINE void LazyScaleAdd(const float *source,
44
+ const float scale) const {
45
+ ScaleAddImpl(source, num_floats_, scale, dest_);
46
+ }
47
+
48
+ // Simple fast while loop to implement dest += source.
49
+ CLD3_ATTRIBUTE_ALWAYS_INLINE static void AddImpl(
50
+ const float *__restrict source, uint32 size, float *__restrict dest) {
51
+ for (uint32 i = 0; i < size; ++i) {
52
+ dest[i] += source[i];
53
+ }
54
+ }
55
+
56
+ // Simple fast while loop to implement dest += scale * source.
57
+ CLD3_ATTRIBUTE_ALWAYS_INLINE static void ScaleAddImpl(
58
+ const float *__restrict source, uint32 size, const float scale,
59
+ float *__restrict dest) {
60
+ for (uint32 i = 0; i < size; ++i) {
61
+ dest[i] += source[i] * scale;
62
+ }
63
+ }
64
+
65
+ private:
66
+ float *dest_;
67
+ int num_floats_;
68
+ };
69
+
70
+ } // namespace chrome_lang_id
71
+
72
+ #endif // SIMPLE_ADDER_H_