cld3 3.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +18 -0
  3. data/LICENSE +204 -0
  4. data/LICENSE_CLD3 +203 -0
  5. data/README.md +22 -0
  6. data/cld3.gemspec +35 -0
  7. data/ext/cld3/base.cc +36 -0
  8. data/ext/cld3/base.h +106 -0
  9. data/ext/cld3/casts.h +98 -0
  10. data/ext/cld3/embedding_feature_extractor.cc +51 -0
  11. data/ext/cld3/embedding_feature_extractor.h +182 -0
  12. data/ext/cld3/embedding_network.cc +196 -0
  13. data/ext/cld3/embedding_network.h +186 -0
  14. data/ext/cld3/embedding_network_params.h +285 -0
  15. data/ext/cld3/extconf.rb +49 -0
  16. data/ext/cld3/feature_extractor.cc +137 -0
  17. data/ext/cld3/feature_extractor.h +633 -0
  18. data/ext/cld3/feature_extractor.proto +50 -0
  19. data/ext/cld3/feature_types.cc +72 -0
  20. data/ext/cld3/feature_types.h +158 -0
  21. data/ext/cld3/fixunicodevalue.cc +55 -0
  22. data/ext/cld3/fixunicodevalue.h +69 -0
  23. data/ext/cld3/float16.h +58 -0
  24. data/ext/cld3/fml_parser.cc +308 -0
  25. data/ext/cld3/fml_parser.h +123 -0
  26. data/ext/cld3/generated_entities.cc +296 -0
  27. data/ext/cld3/generated_ulscript.cc +678 -0
  28. data/ext/cld3/generated_ulscript.h +142 -0
  29. data/ext/cld3/getonescriptspan.cc +1109 -0
  30. data/ext/cld3/getonescriptspan.h +124 -0
  31. data/ext/cld3/integral_types.h +37 -0
  32. data/ext/cld3/lang_id_nn_params.cc +57449 -0
  33. data/ext/cld3/lang_id_nn_params.h +178 -0
  34. data/ext/cld3/language_identifier_features.cc +165 -0
  35. data/ext/cld3/language_identifier_features.h +116 -0
  36. data/ext/cld3/nnet_language_identifier.cc +380 -0
  37. data/ext/cld3/nnet_language_identifier.h +175 -0
  38. data/ext/cld3/nnet_language_identifier_c.cc +72 -0
  39. data/ext/cld3/offsetmap.cc +478 -0
  40. data/ext/cld3/offsetmap.h +168 -0
  41. data/ext/cld3/port.h +143 -0
  42. data/ext/cld3/registry.cc +28 -0
  43. data/ext/cld3/registry.h +242 -0
  44. data/ext/cld3/relevant_script_feature.cc +89 -0
  45. data/ext/cld3/relevant_script_feature.h +49 -0
  46. data/ext/cld3/script_detector.h +156 -0
  47. data/ext/cld3/sentence.proto +77 -0
  48. data/ext/cld3/sentence_features.cc +29 -0
  49. data/ext/cld3/sentence_features.h +35 -0
  50. data/ext/cld3/simple_adder.h +72 -0
  51. data/ext/cld3/stringpiece.h +81 -0
  52. data/ext/cld3/task_context.cc +161 -0
  53. data/ext/cld3/task_context.h +81 -0
  54. data/ext/cld3/task_context_params.cc +74 -0
  55. data/ext/cld3/task_context_params.h +54 -0
  56. data/ext/cld3/task_spec.proto +98 -0
  57. data/ext/cld3/text_processing.cc +245 -0
  58. data/ext/cld3/text_processing.h +30 -0
  59. data/ext/cld3/unicodetext.cc +96 -0
  60. data/ext/cld3/unicodetext.h +144 -0
  61. data/ext/cld3/utf8acceptinterchange.h +486 -0
  62. data/ext/cld3/utf8prop_lettermarkscriptnum.h +1631 -0
  63. data/ext/cld3/utf8repl_lettermarklower.h +758 -0
  64. data/ext/cld3/utf8scannot_lettermarkspecial.h +1455 -0
  65. data/ext/cld3/utf8statetable.cc +1344 -0
  66. data/ext/cld3/utf8statetable.h +285 -0
  67. data/ext/cld3/utils.cc +241 -0
  68. data/ext/cld3/utils.h +144 -0
  69. data/ext/cld3/workspace.cc +64 -0
  70. data/ext/cld3/workspace.h +177 -0
  71. data/lib/cld3.rb +99 -0
  72. metadata +158 -0
@@ -0,0 +1,72 @@
1
+ /* Copyright 2017 Akihiko Odaki <akihiko.odaki.4i@stu.hosei.ac.jp>
2
+ All Rights Reserved.
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ ==============================================================================*/
16
+
17
+ #include <cstddef>
18
+ #include <iostream>
19
+ #include "nnet_language_identifier.h"
20
+
21
+ #if defined _WIN32 || defined __CYGWIN__
22
+ #define EXPORT __declspec(dllexport)
23
+ #else
24
+ #define EXPORT __attribute__ ((visibility ("default")))
25
+ #endif
26
+
27
+ class NNetLanguageIdentifier : public chrome_lang_id::NNetLanguageIdentifier {
28
+ public:
29
+ inline NNetLanguageIdentifier(int min_num_bytes, int max_num_bytes)
30
+ : chrome_lang_id::NNetLanguageIdentifier(min_num_bytes, max_num_bytes)
31
+ {
32
+ }
33
+
34
+ std::string language;
35
+ };
36
+
37
+ extern "C" {
38
+ #include <stddef.h>
39
+
40
+ struct result {
41
+ struct {
42
+ const char *data;
43
+ size_t size;
44
+ } language;
45
+ float probability;
46
+ float proportion;
47
+ bool is_reliable;
48
+ };
49
+
50
+ EXPORT struct result NNetLanguageIdentifier_find_language(void *pointer,
51
+ const char *data,
52
+ size_t size) {
53
+ auto instance = reinterpret_cast<NNetLanguageIdentifier *>(pointer);
54
+ auto result = instance->FindLanguage(std::string(data, size));
55
+ instance->language = std::move(result.language);
56
+
57
+ return (struct result) {
58
+ { instance->language.data(), instance->language.size() },
59
+ std::move(result.probability),
60
+ std::move(result.proportion),
61
+ std::move(result.is_reliable)
62
+ };
63
+ }
64
+
65
+ EXPORT void delete_NNetLanguageIdentifier(void *pointer) {
66
+ delete reinterpret_cast<NNetLanguageIdentifier *>(pointer);
67
+ }
68
+
69
+ EXPORT void *new_NNetLanguageIdentifier(int min_num_bytes, int max_num_bytes) {
70
+ return new NNetLanguageIdentifier(min_num_bytes, max_num_bytes);
71
+ }
72
+ }
@@ -0,0 +1,478 @@
1
+ // Copyright 2013 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+
15
+ //
16
+ // Author: dsites@google.com (Dick Sites)
17
+ //
18
+ //
19
+
20
+ #include "offsetmap.h"
21
+
22
+ #include <string.h> // for strcmp
23
+ #include <algorithm> // for min
24
+
25
+ using namespace std;
26
+
27
+ namespace chrome_lang_id {
28
+ namespace CLD2 {
29
+
30
+ // Constructor, destructor
31
+ OffsetMap::OffsetMap() {
32
+ Clear();
33
+ }
34
+
35
+ OffsetMap::~OffsetMap() {
36
+ }
37
+
38
+ // Clear the map
39
+ // After:
40
+ // next_diff_sub_ is 0
41
+ // Windows are the a and a' ranges covered by diffs_[next_diff_sub_-1]
42
+ // which is a fake range of width 0 mapping 0=>0
43
+ void OffsetMap::Clear() {
44
+ diffs_.clear();
45
+ pending_op_ = COPY_OP;
46
+ pending_length_ = 0;
47
+ next_diff_sub_ = 0;
48
+ current_lo_aoffset_ = 0;
49
+ current_hi_aoffset_ = 0;
50
+ current_lo_aprimeoffset_ = 0;
51
+ current_hi_aprimeoffset_ = 0;
52
+ current_diff_ = 0;
53
+ max_aoffset_ = 0; // Largest seen so far
54
+ max_aprimeoffset_ = 0; // Largest seen so far
55
+ }
56
+
57
+ static inline char OpPart(const char c) {
58
+ return (c >> 6) & 3;
59
+ }
60
+ static inline char LenPart(const char c) {
61
+ return c & 0x3f;
62
+ }
63
+
64
+ // Reset to offset 0
65
+ void OffsetMap::Reset() {
66
+ MaybeFlushAll();
67
+
68
+ next_diff_sub_ = 0;
69
+ current_lo_aoffset_ = 0;
70
+ current_hi_aoffset_ = 0;
71
+ current_lo_aprimeoffset_ = 0;
72
+ current_hi_aprimeoffset_ = 0;
73
+ current_diff_ = 0;
74
+ }
75
+
76
+ // Add to mapping from A to A', specifying how many next bytes are
77
+ // identical in A and A'
78
+ void OffsetMap::Copy(int bytes) {
79
+ if (bytes == 0) {return;}
80
+ max_aoffset_ += bytes; // Largest seen so far
81
+ max_aprimeoffset_ += bytes; // Largest seen so far
82
+ if (pending_op_ == COPY_OP) {
83
+ pending_length_ += bytes;
84
+ } else {
85
+ Flush();
86
+ pending_op_ = COPY_OP;
87
+ pending_length_ = bytes;
88
+ }
89
+ }
90
+
91
+ // Add to mapping from A to A', specifying how many next bytes are
92
+ // inserted in A' while not advancing in A at all
93
+ void OffsetMap::Insert(int bytes){
94
+ if (bytes == 0) {return;}
95
+ max_aprimeoffset_ += bytes; // Largest seen so far
96
+ if (pending_op_ == INSERT_OP) {
97
+ pending_length_ += bytes;
98
+ } else if ((bytes == 1) &&
99
+ (pending_op_ == DELETE_OP) && (pending_length_ == 1)) {
100
+ // Special-case exactly delete(1) insert(1) +> copy(1);
101
+ // all others backmap inserts to after deletes
102
+ pending_op_ = COPY_OP;
103
+ } else {
104
+ Flush();
105
+ pending_op_ = INSERT_OP;
106
+ pending_length_ = bytes;
107
+ }
108
+ }
109
+
110
+ // Add to mapping from A to A', specifying how many next bytes are
111
+ // deleted from A while not advancing in A' at all
112
+ void OffsetMap::Delete(int bytes){
113
+ if (bytes == 0) {return;}
114
+ max_aoffset_ += bytes; // Largest seen so far
115
+ if (pending_op_ == DELETE_OP) {
116
+ pending_length_ += bytes;
117
+ } else if ((bytes == 1) &&
118
+ (pending_op_ == INSERT_OP) && (pending_length_ == 1)) {
119
+ // Special-case exactly insert(1) delete(1) => copy(1);
120
+ // all others backmap deletes to after insertss
121
+ pending_op_ = COPY_OP;
122
+ } else {
123
+ Flush();
124
+ pending_op_ = DELETE_OP;
125
+ pending_length_ = bytes;
126
+ }
127
+ }
128
+
129
+ void OffsetMap::Flush() {
130
+ if (pending_length_ == 0) {
131
+ return;
132
+ }
133
+ // We may be emitting a copy op just after a copy op because +1 -1 cancelled
134
+ // inbetween. If the lengths don't need a prefix byte, combine them
135
+ if ((pending_op_ == COPY_OP) && !diffs_.empty()) {
136
+ char c = diffs_[diffs_.size() - 1];
137
+ MapOp prior_op = static_cast<MapOp>(OpPart(c));
138
+ int prior_len = LenPart(c);
139
+ if ((prior_op == COPY_OP) && ((prior_len + pending_length_) <= 0x3f)) {
140
+ diffs_[diffs_.size() - 1] += pending_length_;
141
+ pending_length_ = 0;
142
+ return;
143
+ }
144
+ }
145
+ if (pending_length_ > 0x3f) {
146
+ bool non_zero_emitted = false;
147
+ for (int shift = 30; shift > 0; shift -= 6) {
148
+ int prefix = (pending_length_ >> shift) & 0x3f;
149
+ if ((prefix > 0) || non_zero_emitted) {
150
+ Emit(PREFIX_OP, prefix);
151
+ non_zero_emitted = true;
152
+ }
153
+ }
154
+ }
155
+ Emit(pending_op_, pending_length_ & 0x3f);
156
+ pending_length_ = 0;
157
+ }
158
+
159
+
160
+ // Add one more entry to copy one byte off the end, then flush
161
+ void OffsetMap::FlushAll() {
162
+ Copy(1);
163
+ Flush();
164
+ }
165
+
166
+ // Flush all if necessary
167
+ void OffsetMap::MaybeFlushAll() {
168
+ if ((0 < pending_length_) || diffs_.empty()) {
169
+ FlushAll();
170
+ }
171
+ }
172
+
173
+ // Len may be 0, for example as the low piece of length=64
174
+ void OffsetMap::Emit(MapOp op, int len) {
175
+ char c = (static_cast<char>(op) << 6) | (len & 0x3f);
176
+ diffs_.push_back(c);
177
+ }
178
+
179
+ //----------------------------------------------------------------------------//
180
+ // The guts of the 2013 design //
181
+ // If there are three ranges a b c in diffs_, we can be in one of five //
182
+ // states: LEFT of a, in ranges a b c, or RIGHT of c //
183
+ // In each state, there are windows A[Alo..Ahi), A'[A'lo..A'hi) and diffs_ //
184
+ // position next_diff_sub_ //
185
+ // There also are mapping constants max_aoffset_ and max_aprimeoffset_ //
186
+ // If LEFT, Alo=Ahi=0, A'lo=A'hi=0 and next_diff_sub_=0 //
187
+ // If RIGHT, Alo=Ahi=max_aoffset_, A'lo=A'hi=max_aprimeoffset_ and //
188
+ // next_diff_sub_=diffs_.size() //
189
+ // Otherwise, at least one of A[) and A'[) is non-empty and the first bytes //
190
+ // correspond to each other. If range i is active, next_diff_sub_ is at //
191
+ // the first byte of range i+1. Because of the length-prefix operator, //
192
+ // an individual range item in diffs_ may be multiple bytes //
193
+ // In all cases aprimeoffset = aoffset + current_diff_ //
194
+ // i.e. current_diff_ = aprimeoffset - aoffset //
195
+ // //
196
+ // In the degenerate case of diffs_.empty(), there are only two states //
197
+ // LEFT and RIGHT and the mapping is the identity mapping. //
198
+ // The initial state is LEFT. //
199
+ // It is an error to move left into LEFT or right into RIGHT, but the code //
200
+ // below is robust in these cases. //
201
+ //----------------------------------------------------------------------------//
202
+
203
+ void OffsetMap::SetLeft() {
204
+ current_lo_aoffset_ = 0;
205
+ current_hi_aoffset_ = 0;
206
+ current_lo_aprimeoffset_ = 0;
207
+ current_hi_aprimeoffset_ = 0;
208
+ current_diff_ = 0;
209
+ next_diff_sub_ = 0;
210
+ }
211
+
212
+ void OffsetMap::SetRight() {
213
+ current_lo_aoffset_ = max_aoffset_;
214
+ current_hi_aoffset_ = max_aoffset_;
215
+ current_lo_aprimeoffset_ = max_aprimeoffset_;
216
+ current_hi_aprimeoffset_ = max_aprimeoffset_;
217
+ current_diff_ = max_aprimeoffset_ - max_aoffset_;
218
+ next_diff_sub_ = 0;
219
+ }
220
+
221
+ // Back up over previous range, 1..5 bytes
222
+ // Return subscript at the beginning of that. Pins at 0
223
+ int OffsetMap::Backup(int sub) {
224
+ if (sub <= 0) {return 0;}
225
+ --sub;
226
+ while ((0 < sub) &&
227
+ (static_cast<MapOp>(OpPart(diffs_[sub - 1]) == PREFIX_OP))) {
228
+ --sub;
229
+ }
230
+ return sub;
231
+ }
232
+
233
+ // Parse next range, 1..5 bytes
234
+ // Return subscript just off the end of that
235
+ int OffsetMap::ParseNext(int sub, MapOp* op, int* length) {
236
+ *op = PREFIX_OP;
237
+ *length = 0;
238
+ char c;
239
+ while ((sub < static_cast<int>(diffs_.size())) && (*op == PREFIX_OP)) {
240
+ c = diffs_[sub++];
241
+ *op = static_cast<MapOp>(OpPart(c));
242
+ int len = LenPart(c);
243
+ *length = (*length << 6) + len;
244
+ }
245
+ // If mal-formed or in RIGHT, this will return with op = PREFIX_OP
246
+ // Mal-formed can include a trailing prefix byte with no following op
247
+ return sub;
248
+ }
249
+
250
+ // Parse previous range, 1..5 bytes
251
+ // Return current subscript
252
+ int OffsetMap::ParsePrevious(int sub, MapOp* op, int* length) {
253
+ sub = Backup(sub);
254
+ return ParseNext(sub, op, length);
255
+ }
256
+
257
+ // Move active window one range to the right
258
+ // Return true if move was OK
259
+ bool OffsetMap::MoveRight() {
260
+ // If at last range or RIGHT, set to RIGHT, return error
261
+ if (next_diff_sub_ >= static_cast<int>(diffs_.size())) {
262
+ SetRight();
263
+ return false;
264
+ }
265
+ // Actually OK to move right
266
+ MapOp op;
267
+ int length;
268
+ bool retval = true;
269
+ // If mal-formed or in RIGHT, this will return with op = PREFIX_OP
270
+ next_diff_sub_ = ParseNext(next_diff_sub_, &op, &length);
271
+
272
+ current_lo_aoffset_ = current_hi_aoffset_;
273
+ current_lo_aprimeoffset_ = current_hi_aprimeoffset_;
274
+ if (op == COPY_OP) {
275
+ current_hi_aoffset_ = current_lo_aoffset_ + length;
276
+ current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + length;
277
+ } else if (op == INSERT_OP) {
278
+ current_hi_aoffset_ = current_lo_aoffset_ + 0;
279
+ current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + length;
280
+ } else if (op == DELETE_OP) {
281
+ current_hi_aoffset_ = current_lo_aoffset_ + length;
282
+ current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + 0;
283
+ } else {
284
+ SetRight();
285
+ retval = false;
286
+ }
287
+ current_diff_ = current_lo_aprimeoffset_ - current_lo_aoffset_;
288
+ return retval;
289
+ }
290
+
291
+ // Move active window one range to the left
292
+ // Return true if move was OK
293
+ bool OffsetMap::MoveLeft() {
294
+ // If at first range or LEFT, set to LEFT, return error
295
+ if (next_diff_sub_ <= 0) {
296
+ SetLeft();
297
+ return false;
298
+ }
299
+ // Back up over current active window
300
+ next_diff_sub_ = Backup(next_diff_sub_);
301
+ if (next_diff_sub_ <= 0) {
302
+ SetLeft();
303
+ return false;
304
+ }
305
+ // Actually OK to move left
306
+ MapOp op;
307
+ int length;
308
+
309
+ // TODO(abakalov): 'retval' below is set but not used, which is suspicious.
310
+ // Did the authors mean to return this variable, analogously to MoveRight()?
311
+ // bool retval = true;
312
+ // If mal-formed or in LEFT, this will return with op = PREFIX_OP
313
+ next_diff_sub_ = ParsePrevious(next_diff_sub_, &op, &length);
314
+
315
+ current_hi_aoffset_ = current_lo_aoffset_;
316
+ current_hi_aprimeoffset_ = current_lo_aprimeoffset_;
317
+ if (op == COPY_OP) {
318
+ current_lo_aoffset_ = current_hi_aoffset_ - length;
319
+ current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - length;
320
+ } else if (op == INSERT_OP) {
321
+ current_lo_aoffset_ = current_hi_aoffset_ - 0;
322
+ current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - length;
323
+ } else if (op == DELETE_OP) {
324
+ current_lo_aoffset_ = current_hi_aoffset_ - length;
325
+ current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - 0;
326
+ } else {
327
+ SetLeft();
328
+ // retval = false;
329
+ }
330
+ current_diff_ = current_lo_aprimeoffset_ - current_lo_aoffset_;
331
+ return true;
332
+ }
333
+
334
+ // Map an offset in A' to the corresponding offset in A
335
+ int OffsetMap::MapBack(int aprimeoffset){
336
+ MaybeFlushAll();
337
+ if (aprimeoffset < 0) {return 0;}
338
+ if (max_aprimeoffset_ <= aprimeoffset) {
339
+ return (aprimeoffset - max_aprimeoffset_) + max_aoffset_;
340
+ }
341
+
342
+ // If current_lo_aprimeoffset_ <= aprimeoffset < current_hi_aprimeoffset_,
343
+ // use current mapping, else move window left/right
344
+ bool ok = true;
345
+ while (ok && (aprimeoffset < current_lo_aprimeoffset_)) {
346
+ ok = MoveLeft();
347
+ }
348
+ while (ok && (current_hi_aprimeoffset_ <= aprimeoffset)) {
349
+ ok = MoveRight();
350
+ }
351
+ // So now current_lo_aprimeoffset_ <= aprimeoffset < current_hi_aprimeoffset_
352
+
353
+ int aoffset = aprimeoffset - current_diff_;
354
+ if (aoffset >= current_hi_aoffset_) {
355
+ // A' is in an insert region, all bytes of which backmap to A=hi_aoffset_
356
+ aoffset = current_hi_aoffset_;
357
+ }
358
+ return aoffset;
359
+ }
360
+
361
+ // Map an offset in A to the corresponding offset in A'
362
+ int OffsetMap::MapForward(int aoffset){
363
+ MaybeFlushAll();
364
+ if (aoffset < 0) {return 0;}
365
+ if (max_aoffset_ <= aoffset) {
366
+ return (aoffset - max_aoffset_) + max_aprimeoffset_;
367
+ }
368
+
369
+ // If current_lo_aoffset_ <= aoffset < current_hi_aoffset_,
370
+ // use current mapping, else move window left/right
371
+ bool ok = true;
372
+ while (ok && (aoffset < current_lo_aoffset_)) {
373
+ ok = MoveLeft();
374
+ }
375
+ while (ok && (current_hi_aoffset_ <= aoffset)) {
376
+ ok = MoveRight();
377
+ }
378
+
379
+ int aprimeoffset = aoffset + current_diff_;
380
+ if (aprimeoffset >= current_hi_aprimeoffset_) {
381
+ // A is in a delete region, all bytes of which map to A'=hi_aprimeoffset_
382
+ aprimeoffset = current_hi_aprimeoffset_;
383
+ }
384
+ return aprimeoffset;
385
+ }
386
+
387
+
388
+ // static
389
+ bool OffsetMap::CopyInserts(OffsetMap* source, OffsetMap* dest) {
390
+ bool ok = true;
391
+ while (ok && (source->next_diff_sub_ !=
392
+ static_cast<int>(source->diffs_.size()))) {
393
+ ok = source->MoveRight();
394
+ if (source->current_lo_aoffset_ != source->current_hi_aoffset_) {
395
+ return false;
396
+ }
397
+ dest->Insert(
398
+ source->current_hi_aprimeoffset_ - source->current_lo_aprimeoffset_);
399
+ }
400
+ return true;
401
+ }
402
+
403
+ // static
404
+ bool OffsetMap::CopyDeletes(OffsetMap* source, OffsetMap* dest) {
405
+ bool ok = true;
406
+ while (ok && (source->next_diff_sub_ !=
407
+ static_cast<int>(source->diffs_.size()))) {
408
+ ok = source->MoveRight();
409
+ if (source->current_lo_aprimeoffset_ != source->current_hi_aprimeoffset_) {
410
+ return false;
411
+ }
412
+ dest->Delete(source->current_hi_aoffset_ - source->current_lo_aoffset_);
413
+ }
414
+ return true;
415
+ }
416
+
417
+ // static
418
+ void OffsetMap::ComposeOffsetMap(
419
+ OffsetMap* g, OffsetMap* f, OffsetMap* h) {
420
+ h->Clear();
421
+ f->Reset();
422
+ g->Reset();
423
+
424
+ int lo = 0;
425
+ for (;;) {
426
+ // Consume delete operations in f. This moves A without moving
427
+ // A' and A''.
428
+ if (lo >= g->current_hi_aoffset_ && CopyInserts(g, h)) {
429
+ if (lo >= f->current_hi_aprimeoffset_ && CopyDeletes(f, h)) {
430
+ // fprintf(stderr,
431
+ // "ComposeOffsetMap ERROR, f is longer than g.<br>\n");
432
+ }
433
+
434
+ // FlushAll(), called by Reset(), MapForward() or MapBack(), has
435
+ // added an extra COPY_OP to f and g, so this function has
436
+ // composed an extra COPY_OP in h from those. To avoid
437
+ // FlushAll() adds one more extra COPY_OP to h later, dispatch
438
+ // Flush() right now.
439
+ h->Flush();
440
+ return;
441
+ }
442
+
443
+ // Consume insert operations in g. This moves A'' without moving A
444
+ // and A'.
445
+ if (lo >= f->current_hi_aprimeoffset_) {
446
+ if (!CopyDeletes(f, h)) {
447
+ // fprintf(stderr,
448
+ // "ComposeOffsetMap ERROR, g is longer than f.<br>\n");
449
+ }
450
+ }
451
+
452
+ // Compose one operation which moves A' from lo to hi.
453
+ int hi = min(f->current_hi_aprimeoffset_, g->current_hi_aoffset_);
454
+ if (f->current_lo_aoffset_ != f->current_hi_aoffset_ &&
455
+ g->current_lo_aprimeoffset_ != g->current_hi_aprimeoffset_) {
456
+ h->Copy(hi - lo);
457
+ } else if (f->current_lo_aoffset_ != f->current_hi_aoffset_) {
458
+ h->Delete(hi - lo);
459
+ } else if (g->current_lo_aprimeoffset_ != g->current_hi_aprimeoffset_) {
460
+ h->Insert(hi - lo);
461
+ }
462
+
463
+ lo = hi;
464
+ }
465
+ }
466
+
467
+ // For testing only -- force a mapping
468
+ void OffsetMap::StuffIt(const std::string& diffs,
469
+ int max_aoffset, int max_aprimeoffset) {
470
+ Clear();
471
+ diffs_ = diffs;
472
+ max_aoffset_ = max_aoffset;
473
+ max_aprimeoffset_ = max_aprimeoffset;
474
+ }
475
+
476
+
477
+ } // namespace CLD2
478
+ } // namespace chrome_lang_id