cld3 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +18 -0
  3. data/LICENSE +204 -0
  4. data/LICENSE_CLD3 +203 -0
  5. data/README.md +22 -0
  6. data/cld3.gemspec +35 -0
  7. data/ext/cld3/base.cc +36 -0
  8. data/ext/cld3/base.h +106 -0
  9. data/ext/cld3/casts.h +98 -0
  10. data/ext/cld3/embedding_feature_extractor.cc +51 -0
  11. data/ext/cld3/embedding_feature_extractor.h +182 -0
  12. data/ext/cld3/embedding_network.cc +196 -0
  13. data/ext/cld3/embedding_network.h +186 -0
  14. data/ext/cld3/embedding_network_params.h +285 -0
  15. data/ext/cld3/extconf.rb +49 -0
  16. data/ext/cld3/feature_extractor.cc +137 -0
  17. data/ext/cld3/feature_extractor.h +633 -0
  18. data/ext/cld3/feature_extractor.proto +50 -0
  19. data/ext/cld3/feature_types.cc +72 -0
  20. data/ext/cld3/feature_types.h +158 -0
  21. data/ext/cld3/fixunicodevalue.cc +55 -0
  22. data/ext/cld3/fixunicodevalue.h +69 -0
  23. data/ext/cld3/float16.h +58 -0
  24. data/ext/cld3/fml_parser.cc +308 -0
  25. data/ext/cld3/fml_parser.h +123 -0
  26. data/ext/cld3/generated_entities.cc +296 -0
  27. data/ext/cld3/generated_ulscript.cc +678 -0
  28. data/ext/cld3/generated_ulscript.h +142 -0
  29. data/ext/cld3/getonescriptspan.cc +1109 -0
  30. data/ext/cld3/getonescriptspan.h +124 -0
  31. data/ext/cld3/integral_types.h +37 -0
  32. data/ext/cld3/lang_id_nn_params.cc +57449 -0
  33. data/ext/cld3/lang_id_nn_params.h +178 -0
  34. data/ext/cld3/language_identifier_features.cc +165 -0
  35. data/ext/cld3/language_identifier_features.h +116 -0
  36. data/ext/cld3/nnet_language_identifier.cc +380 -0
  37. data/ext/cld3/nnet_language_identifier.h +175 -0
  38. data/ext/cld3/nnet_language_identifier_c.cc +72 -0
  39. data/ext/cld3/offsetmap.cc +478 -0
  40. data/ext/cld3/offsetmap.h +168 -0
  41. data/ext/cld3/port.h +143 -0
  42. data/ext/cld3/registry.cc +28 -0
  43. data/ext/cld3/registry.h +242 -0
  44. data/ext/cld3/relevant_script_feature.cc +89 -0
  45. data/ext/cld3/relevant_script_feature.h +49 -0
  46. data/ext/cld3/script_detector.h +156 -0
  47. data/ext/cld3/sentence.proto +77 -0
  48. data/ext/cld3/sentence_features.cc +29 -0
  49. data/ext/cld3/sentence_features.h +35 -0
  50. data/ext/cld3/simple_adder.h +72 -0
  51. data/ext/cld3/stringpiece.h +81 -0
  52. data/ext/cld3/task_context.cc +161 -0
  53. data/ext/cld3/task_context.h +81 -0
  54. data/ext/cld3/task_context_params.cc +74 -0
  55. data/ext/cld3/task_context_params.h +54 -0
  56. data/ext/cld3/task_spec.proto +98 -0
  57. data/ext/cld3/text_processing.cc +245 -0
  58. data/ext/cld3/text_processing.h +30 -0
  59. data/ext/cld3/unicodetext.cc +96 -0
  60. data/ext/cld3/unicodetext.h +144 -0
  61. data/ext/cld3/utf8acceptinterchange.h +486 -0
  62. data/ext/cld3/utf8prop_lettermarkscriptnum.h +1631 -0
  63. data/ext/cld3/utf8repl_lettermarklower.h +758 -0
  64. data/ext/cld3/utf8scannot_lettermarkspecial.h +1455 -0
  65. data/ext/cld3/utf8statetable.cc +1344 -0
  66. data/ext/cld3/utf8statetable.h +285 -0
  67. data/ext/cld3/utils.cc +241 -0
  68. data/ext/cld3/utils.h +144 -0
  69. data/ext/cld3/workspace.cc +64 -0
  70. data/ext/cld3/workspace.h +177 -0
  71. data/lib/cld3.rb +99 -0
  72. metadata +158 -0
@@ -0,0 +1,72 @@
1
+ /* Copyright 2017 Akihiko Odaki <akihiko.odaki.4i@stu.hosei.ac.jp>
2
+ All Rights Reserved.
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ ==============================================================================*/
16
+
17
+ #include <cstddef>
18
+ #include <iostream>
19
+ #include "nnet_language_identifier.h"
20
+
21
+ #if defined _WIN32 || defined __CYGWIN__
22
+ #define EXPORT __declspec(dllexport)
23
+ #else
24
+ #define EXPORT __attribute__ ((visibility ("default")))
25
+ #endif
26
+
27
+ class NNetLanguageIdentifier : public chrome_lang_id::NNetLanguageIdentifier {
28
+ public:
29
+ inline NNetLanguageIdentifier(int min_num_bytes, int max_num_bytes)
30
+ : chrome_lang_id::NNetLanguageIdentifier(min_num_bytes, max_num_bytes)
31
+ {
32
+ }
33
+
34
+ std::string language;
35
+ };
36
+
37
+ extern "C" {
38
+ #include <stddef.h>
39
+
40
+ struct result {
41
+ struct {
42
+ const char *data;
43
+ size_t size;
44
+ } language;
45
+ float probability;
46
+ float proportion;
47
+ bool is_reliable;
48
+ };
49
+
50
+ EXPORT struct result NNetLanguageIdentifier_find_language(void *pointer,
51
+ const char *data,
52
+ size_t size) {
53
+ auto instance = reinterpret_cast<NNetLanguageIdentifier *>(pointer);
54
+ auto result = instance->FindLanguage(std::string(data, size));
55
+ instance->language = std::move(result.language);
56
+
57
+ return (struct result) {
58
+ { instance->language.data(), instance->language.size() },
59
+ std::move(result.probability),
60
+ std::move(result.proportion),
61
+ std::move(result.is_reliable)
62
+ };
63
+ }
64
+
65
+ EXPORT void delete_NNetLanguageIdentifier(void *pointer) {
66
+ delete reinterpret_cast<NNetLanguageIdentifier *>(pointer);
67
+ }
68
+
69
+ EXPORT void *new_NNetLanguageIdentifier(int min_num_bytes, int max_num_bytes) {
70
+ return new NNetLanguageIdentifier(min_num_bytes, max_num_bytes);
71
+ }
72
+ }
@@ -0,0 +1,478 @@
1
+ // Copyright 2013 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+
15
+ //
16
+ // Author: dsites@google.com (Dick Sites)
17
+ //
18
+ //
19
+
20
+ #include "offsetmap.h"
21
+
22
+ #include <string.h> // for strcmp
23
+ #include <algorithm> // for min
24
+
25
+ using namespace std;
26
+
27
+ namespace chrome_lang_id {
28
+ namespace CLD2 {
29
+
30
+ // Constructor, destructor
31
+ OffsetMap::OffsetMap() {
32
+ Clear();
33
+ }
34
+
35
+ OffsetMap::~OffsetMap() {
36
+ }
37
+
38
+ // Clear the map
39
+ // After:
40
+ // next_diff_sub_ is 0
41
+ // Windows are the a and a' ranges covered by diffs_[next_diff_sub_-1]
42
+ // which is a fake range of width 0 mapping 0=>0
43
+ void OffsetMap::Clear() {
44
+ diffs_.clear();
45
+ pending_op_ = COPY_OP;
46
+ pending_length_ = 0;
47
+ next_diff_sub_ = 0;
48
+ current_lo_aoffset_ = 0;
49
+ current_hi_aoffset_ = 0;
50
+ current_lo_aprimeoffset_ = 0;
51
+ current_hi_aprimeoffset_ = 0;
52
+ current_diff_ = 0;
53
+ max_aoffset_ = 0; // Largest seen so far
54
+ max_aprimeoffset_ = 0; // Largest seen so far
55
+ }
56
+
57
+ static inline char OpPart(const char c) {
58
+ return (c >> 6) & 3;
59
+ }
60
+ static inline char LenPart(const char c) {
61
+ return c & 0x3f;
62
+ }
63
+
64
+ // Reset to offset 0
65
+ void OffsetMap::Reset() {
66
+ MaybeFlushAll();
67
+
68
+ next_diff_sub_ = 0;
69
+ current_lo_aoffset_ = 0;
70
+ current_hi_aoffset_ = 0;
71
+ current_lo_aprimeoffset_ = 0;
72
+ current_hi_aprimeoffset_ = 0;
73
+ current_diff_ = 0;
74
+ }
75
+
76
+ // Add to mapping from A to A', specifying how many next bytes are
77
+ // identical in A and A'
78
+ void OffsetMap::Copy(int bytes) {
79
+ if (bytes == 0) {return;}
80
+ max_aoffset_ += bytes; // Largest seen so far
81
+ max_aprimeoffset_ += bytes; // Largest seen so far
82
+ if (pending_op_ == COPY_OP) {
83
+ pending_length_ += bytes;
84
+ } else {
85
+ Flush();
86
+ pending_op_ = COPY_OP;
87
+ pending_length_ = bytes;
88
+ }
89
+ }
90
+
91
+ // Add to mapping from A to A', specifying how many next bytes are
92
+ // inserted in A' while not advancing in A at all
93
+ void OffsetMap::Insert(int bytes){
94
+ if (bytes == 0) {return;}
95
+ max_aprimeoffset_ += bytes; // Largest seen so far
96
+ if (pending_op_ == INSERT_OP) {
97
+ pending_length_ += bytes;
98
+ } else if ((bytes == 1) &&
99
+ (pending_op_ == DELETE_OP) && (pending_length_ == 1)) {
100
+ // Special-case exactly delete(1) insert(1) +> copy(1);
101
+ // all others backmap inserts to after deletes
102
+ pending_op_ = COPY_OP;
103
+ } else {
104
+ Flush();
105
+ pending_op_ = INSERT_OP;
106
+ pending_length_ = bytes;
107
+ }
108
+ }
109
+
110
+ // Add to mapping from A to A', specifying how many next bytes are
111
+ // deleted from A while not advancing in A' at all
112
+ void OffsetMap::Delete(int bytes){
113
+ if (bytes == 0) {return;}
114
+ max_aoffset_ += bytes; // Largest seen so far
115
+ if (pending_op_ == DELETE_OP) {
116
+ pending_length_ += bytes;
117
+ } else if ((bytes == 1) &&
118
+ (pending_op_ == INSERT_OP) && (pending_length_ == 1)) {
119
+ // Special-case exactly insert(1) delete(1) => copy(1);
120
+ // all others backmap deletes to after insertss
121
+ pending_op_ = COPY_OP;
122
+ } else {
123
+ Flush();
124
+ pending_op_ = DELETE_OP;
125
+ pending_length_ = bytes;
126
+ }
127
+ }
128
+
129
+ void OffsetMap::Flush() {
130
+ if (pending_length_ == 0) {
131
+ return;
132
+ }
133
+ // We may be emitting a copy op just after a copy op because +1 -1 cancelled
134
+ // inbetween. If the lengths don't need a prefix byte, combine them
135
+ if ((pending_op_ == COPY_OP) && !diffs_.empty()) {
136
+ char c = diffs_[diffs_.size() - 1];
137
+ MapOp prior_op = static_cast<MapOp>(OpPart(c));
138
+ int prior_len = LenPart(c);
139
+ if ((prior_op == COPY_OP) && ((prior_len + pending_length_) <= 0x3f)) {
140
+ diffs_[diffs_.size() - 1] += pending_length_;
141
+ pending_length_ = 0;
142
+ return;
143
+ }
144
+ }
145
+ if (pending_length_ > 0x3f) {
146
+ bool non_zero_emitted = false;
147
+ for (int shift = 30; shift > 0; shift -= 6) {
148
+ int prefix = (pending_length_ >> shift) & 0x3f;
149
+ if ((prefix > 0) || non_zero_emitted) {
150
+ Emit(PREFIX_OP, prefix);
151
+ non_zero_emitted = true;
152
+ }
153
+ }
154
+ }
155
+ Emit(pending_op_, pending_length_ & 0x3f);
156
+ pending_length_ = 0;
157
+ }
158
+
159
+
160
+ // Add one more entry to copy one byte off the end, then flush
161
+ void OffsetMap::FlushAll() {
162
+ Copy(1);
163
+ Flush();
164
+ }
165
+
166
+ // Flush all if necessary
167
+ void OffsetMap::MaybeFlushAll() {
168
+ if ((0 < pending_length_) || diffs_.empty()) {
169
+ FlushAll();
170
+ }
171
+ }
172
+
173
+ // Len may be 0, for example as the low piece of length=64
174
+ void OffsetMap::Emit(MapOp op, int len) {
175
+ char c = (static_cast<char>(op) << 6) | (len & 0x3f);
176
+ diffs_.push_back(c);
177
+ }
178
+
179
+ //----------------------------------------------------------------------------//
180
+ // The guts of the 2013 design //
181
+ // If there are three ranges a b c in diffs_, we can be in one of five //
182
+ // states: LEFT of a, in ranges a b c, or RIGHT of c //
183
+ // In each state, there are windows A[Alo..Ahi), A'[A'lo..A'hi) and diffs_ //
184
+ // position next_diff_sub_ //
185
+ // There also are mapping constants max_aoffset_ and max_aprimeoffset_ //
186
+ // If LEFT, Alo=Ahi=0, A'lo=A'hi=0 and next_diff_sub_=0 //
187
+ // If RIGHT, Alo=Ahi=max_aoffset_, A'lo=A'hi=max_aprimeoffset_ and //
188
+ // next_diff_sub_=diffs_.size() //
189
+ // Otherwise, at least one of A[) and A'[) is non-empty and the first bytes //
190
+ // correspond to each other. If range i is active, next_diff_sub_ is at //
191
+ // the first byte of range i+1. Because of the length-prefix operator, //
192
+ // an individual range item in diffs_ may be multiple bytes //
193
+ // In all cases aprimeoffset = aoffset + current_diff_ //
194
+ // i.e. current_diff_ = aprimeoffset - aoffset //
195
+ // //
196
+ // In the degenerate case of diffs_.empty(), there are only two states //
197
+ // LEFT and RIGHT and the mapping is the identity mapping. //
198
+ // The initial state is LEFT. //
199
+ // It is an error to move left into LEFT or right into RIGHT, but the code //
200
+ // below is robust in these cases. //
201
+ //----------------------------------------------------------------------------//
202
+
203
+ void OffsetMap::SetLeft() {
204
+ current_lo_aoffset_ = 0;
205
+ current_hi_aoffset_ = 0;
206
+ current_lo_aprimeoffset_ = 0;
207
+ current_hi_aprimeoffset_ = 0;
208
+ current_diff_ = 0;
209
+ next_diff_sub_ = 0;
210
+ }
211
+
212
+ void OffsetMap::SetRight() {
213
+ current_lo_aoffset_ = max_aoffset_;
214
+ current_hi_aoffset_ = max_aoffset_;
215
+ current_lo_aprimeoffset_ = max_aprimeoffset_;
216
+ current_hi_aprimeoffset_ = max_aprimeoffset_;
217
+ current_diff_ = max_aprimeoffset_ - max_aoffset_;
218
+ next_diff_sub_ = 0;
219
+ }
220
+
221
+ // Back up over previous range, 1..5 bytes
222
+ // Return subscript at the beginning of that. Pins at 0
223
+ int OffsetMap::Backup(int sub) {
224
+ if (sub <= 0) {return 0;}
225
+ --sub;
226
+ while ((0 < sub) &&
227
+ (static_cast<MapOp>(OpPart(diffs_[sub - 1]) == PREFIX_OP))) {
228
+ --sub;
229
+ }
230
+ return sub;
231
+ }
232
+
233
+ // Parse next range, 1..5 bytes
234
+ // Return subscript just off the end of that
235
+ int OffsetMap::ParseNext(int sub, MapOp* op, int* length) {
236
+ *op = PREFIX_OP;
237
+ *length = 0;
238
+ char c;
239
+ while ((sub < static_cast<int>(diffs_.size())) && (*op == PREFIX_OP)) {
240
+ c = diffs_[sub++];
241
+ *op = static_cast<MapOp>(OpPart(c));
242
+ int len = LenPart(c);
243
+ *length = (*length << 6) + len;
244
+ }
245
+ // If mal-formed or in RIGHT, this will return with op = PREFIX_OP
246
+ // Mal-formed can include a trailing prefix byte with no following op
247
+ return sub;
248
+ }
249
+
250
+ // Parse previous range, 1..5 bytes
251
+ // Return current subscript
252
+ int OffsetMap::ParsePrevious(int sub, MapOp* op, int* length) {
253
+ sub = Backup(sub);
254
+ return ParseNext(sub, op, length);
255
+ }
256
+
257
+ // Move active window one range to the right
258
+ // Return true if move was OK
259
+ bool OffsetMap::MoveRight() {
260
+ // If at last range or RIGHT, set to RIGHT, return error
261
+ if (next_diff_sub_ >= static_cast<int>(diffs_.size())) {
262
+ SetRight();
263
+ return false;
264
+ }
265
+ // Actually OK to move right
266
+ MapOp op;
267
+ int length;
268
+ bool retval = true;
269
+ // If mal-formed or in RIGHT, this will return with op = PREFIX_OP
270
+ next_diff_sub_ = ParseNext(next_diff_sub_, &op, &length);
271
+
272
+ current_lo_aoffset_ = current_hi_aoffset_;
273
+ current_lo_aprimeoffset_ = current_hi_aprimeoffset_;
274
+ if (op == COPY_OP) {
275
+ current_hi_aoffset_ = current_lo_aoffset_ + length;
276
+ current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + length;
277
+ } else if (op == INSERT_OP) {
278
+ current_hi_aoffset_ = current_lo_aoffset_ + 0;
279
+ current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + length;
280
+ } else if (op == DELETE_OP) {
281
+ current_hi_aoffset_ = current_lo_aoffset_ + length;
282
+ current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + 0;
283
+ } else {
284
+ SetRight();
285
+ retval = false;
286
+ }
287
+ current_diff_ = current_lo_aprimeoffset_ - current_lo_aoffset_;
288
+ return retval;
289
+ }
290
+
291
+ // Move active window one range to the left
292
+ // Return true if move was OK
293
+ bool OffsetMap::MoveLeft() {
294
+ // If at first range or LEFT, set to LEFT, return error
295
+ if (next_diff_sub_ <= 0) {
296
+ SetLeft();
297
+ return false;
298
+ }
299
+ // Back up over current active window
300
+ next_diff_sub_ = Backup(next_diff_sub_);
301
+ if (next_diff_sub_ <= 0) {
302
+ SetLeft();
303
+ return false;
304
+ }
305
+ // Actually OK to move left
306
+ MapOp op;
307
+ int length;
308
+
309
+ // TODO(abakalov): 'retval' below is set but not used, which is suspicious.
310
+ // Did the authors mean to return this variable, analogously to MoveRight()?
311
+ // bool retval = true;
312
+ // If mal-formed or in LEFT, this will return with op = PREFIX_OP
313
+ next_diff_sub_ = ParsePrevious(next_diff_sub_, &op, &length);
314
+
315
+ current_hi_aoffset_ = current_lo_aoffset_;
316
+ current_hi_aprimeoffset_ = current_lo_aprimeoffset_;
317
+ if (op == COPY_OP) {
318
+ current_lo_aoffset_ = current_hi_aoffset_ - length;
319
+ current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - length;
320
+ } else if (op == INSERT_OP) {
321
+ current_lo_aoffset_ = current_hi_aoffset_ - 0;
322
+ current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - length;
323
+ } else if (op == DELETE_OP) {
324
+ current_lo_aoffset_ = current_hi_aoffset_ - length;
325
+ current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - 0;
326
+ } else {
327
+ SetLeft();
328
+ // retval = false;
329
+ }
330
+ current_diff_ = current_lo_aprimeoffset_ - current_lo_aoffset_;
331
+ return true;
332
+ }
333
+
334
+ // Map an offset in A' to the corresponding offset in A
335
+ int OffsetMap::MapBack(int aprimeoffset){
336
+ MaybeFlushAll();
337
+ if (aprimeoffset < 0) {return 0;}
338
+ if (max_aprimeoffset_ <= aprimeoffset) {
339
+ return (aprimeoffset - max_aprimeoffset_) + max_aoffset_;
340
+ }
341
+
342
+ // If current_lo_aprimeoffset_ <= aprimeoffset < current_hi_aprimeoffset_,
343
+ // use current mapping, else move window left/right
344
+ bool ok = true;
345
+ while (ok && (aprimeoffset < current_lo_aprimeoffset_)) {
346
+ ok = MoveLeft();
347
+ }
348
+ while (ok && (current_hi_aprimeoffset_ <= aprimeoffset)) {
349
+ ok = MoveRight();
350
+ }
351
+ // So now current_lo_aprimeoffset_ <= aprimeoffset < current_hi_aprimeoffset_
352
+
353
+ int aoffset = aprimeoffset - current_diff_;
354
+ if (aoffset >= current_hi_aoffset_) {
355
+ // A' is in an insert region, all bytes of which backmap to A=hi_aoffset_
356
+ aoffset = current_hi_aoffset_;
357
+ }
358
+ return aoffset;
359
+ }
360
+
361
+ // Map an offset in A to the corresponding offset in A'
362
+ int OffsetMap::MapForward(int aoffset){
363
+ MaybeFlushAll();
364
+ if (aoffset < 0) {return 0;}
365
+ if (max_aoffset_ <= aoffset) {
366
+ return (aoffset - max_aoffset_) + max_aprimeoffset_;
367
+ }
368
+
369
+ // If current_lo_aoffset_ <= aoffset < current_hi_aoffset_,
370
+ // use current mapping, else move window left/right
371
+ bool ok = true;
372
+ while (ok && (aoffset < current_lo_aoffset_)) {
373
+ ok = MoveLeft();
374
+ }
375
+ while (ok && (current_hi_aoffset_ <= aoffset)) {
376
+ ok = MoveRight();
377
+ }
378
+
379
+ int aprimeoffset = aoffset + current_diff_;
380
+ if (aprimeoffset >= current_hi_aprimeoffset_) {
381
+ // A is in a delete region, all bytes of which map to A'=hi_aprimeoffset_
382
+ aprimeoffset = current_hi_aprimeoffset_;
383
+ }
384
+ return aprimeoffset;
385
+ }
386
+
387
+
388
+ // static
389
+ bool OffsetMap::CopyInserts(OffsetMap* source, OffsetMap* dest) {
390
+ bool ok = true;
391
+ while (ok && (source->next_diff_sub_ !=
392
+ static_cast<int>(source->diffs_.size()))) {
393
+ ok = source->MoveRight();
394
+ if (source->current_lo_aoffset_ != source->current_hi_aoffset_) {
395
+ return false;
396
+ }
397
+ dest->Insert(
398
+ source->current_hi_aprimeoffset_ - source->current_lo_aprimeoffset_);
399
+ }
400
+ return true;
401
+ }
402
+
403
+ // static
404
+ bool OffsetMap::CopyDeletes(OffsetMap* source, OffsetMap* dest) {
405
+ bool ok = true;
406
+ while (ok && (source->next_diff_sub_ !=
407
+ static_cast<int>(source->diffs_.size()))) {
408
+ ok = source->MoveRight();
409
+ if (source->current_lo_aprimeoffset_ != source->current_hi_aprimeoffset_) {
410
+ return false;
411
+ }
412
+ dest->Delete(source->current_hi_aoffset_ - source->current_lo_aoffset_);
413
+ }
414
+ return true;
415
+ }
416
+
417
+ // static
418
+ void OffsetMap::ComposeOffsetMap(
419
+ OffsetMap* g, OffsetMap* f, OffsetMap* h) {
420
+ h->Clear();
421
+ f->Reset();
422
+ g->Reset();
423
+
424
+ int lo = 0;
425
+ for (;;) {
426
+ // Consume delete operations in f. This moves A without moving
427
+ // A' and A''.
428
+ if (lo >= g->current_hi_aoffset_ && CopyInserts(g, h)) {
429
+ if (lo >= f->current_hi_aprimeoffset_ && CopyDeletes(f, h)) {
430
+ // fprintf(stderr,
431
+ // "ComposeOffsetMap ERROR, f is longer than g.<br>\n");
432
+ }
433
+
434
+ // FlushAll(), called by Reset(), MapForward() or MapBack(), has
435
+ // added an extra COPY_OP to f and g, so this function has
436
+ // composed an extra COPY_OP in h from those. To avoid
437
+ // FlushAll() adds one more extra COPY_OP to h later, dispatch
438
+ // Flush() right now.
439
+ h->Flush();
440
+ return;
441
+ }
442
+
443
+ // Consume insert operations in g. This moves A'' without moving A
444
+ // and A'.
445
+ if (lo >= f->current_hi_aprimeoffset_) {
446
+ if (!CopyDeletes(f, h)) {
447
+ // fprintf(stderr,
448
+ // "ComposeOffsetMap ERROR, g is longer than f.<br>\n");
449
+ }
450
+ }
451
+
452
+ // Compose one operation which moves A' from lo to hi.
453
+ int hi = min(f->current_hi_aprimeoffset_, g->current_hi_aoffset_);
454
+ if (f->current_lo_aoffset_ != f->current_hi_aoffset_ &&
455
+ g->current_lo_aprimeoffset_ != g->current_hi_aprimeoffset_) {
456
+ h->Copy(hi - lo);
457
+ } else if (f->current_lo_aoffset_ != f->current_hi_aoffset_) {
458
+ h->Delete(hi - lo);
459
+ } else if (g->current_lo_aprimeoffset_ != g->current_hi_aprimeoffset_) {
460
+ h->Insert(hi - lo);
461
+ }
462
+
463
+ lo = hi;
464
+ }
465
+ }
466
+
467
+ // For testing only -- force a mapping
468
+ void OffsetMap::StuffIt(const std::string& diffs,
469
+ int max_aoffset, int max_aprimeoffset) {
470
+ Clear();
471
+ diffs_ = diffs;
472
+ max_aoffset_ = max_aoffset;
473
+ max_aprimeoffset_ = max_aprimeoffset;
474
+ }
475
+
476
+
477
+ } // namespace CLD2
478
+ } // namespace chrome_lang_id