word-aligner 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CLAUDE.md ADDED
@@ -0,0 +1,111 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Project Purpose
6
+
7
+ **word-aligner** is an NPM library for handling word alignment logic between original-language Bible text (Greek, Hebrew, Aramaic) and target-language translations. It converts between USFM verse strings, verse object hierarchies, and structured alignment data used across unfoldingWord tools.
8
+
9
+ ## Commands
10
+
11
+ ```bash
12
+ # Install (legacy-peer-deps required due to usfm-js peer dependency)
13
+ npm i --legacy-peer-deps
14
+
15
+ # Run linting + all tests
16
+ npm test
17
+
18
+ # Run tests only (skip lint)
19
+ npx jest
20
+
21
+ # Run a single test file
22
+ npx jest __tests__/align.test.js
23
+
24
+ # Run tests matching a pattern
25
+ npx jest --testNamePattern "oneToOne"
26
+
27
+ # Lint only
28
+ npx eslint ./src
29
+
30
+ # Lint with auto-fix
31
+ npm run fix
32
+
33
+ # Compile src/ → lib/ (required before publishing)
34
+ npm run build
35
+
36
+ # Rebuild test fixture JSON files
37
+ npm run build:test-data
38
+ ```
39
+
40
+ ## Architecture
41
+
42
+ ### Entry point
43
+
44
+ `src/index.js` exports four things:
45
+ - `default` — the main aligner (`src/js/aligner.js`)
46
+ - `VerseObjectUtils` — verse object parsing helpers (`src/js/utils/verseObjects.js`)
47
+ - `MorphUtils` — morphological code parser for Greek/Hebrew/Aramaic (`src/js/utils/MorphUtils.js`)
48
+ - `ArrayUtils` — low-level array helpers (`src/js/utils/array.js`)
49
+
50
+ The compiled output lives in `lib/` (Babel 6, `babel-preset-es2015`). The `lib/` directory is what npm consumers receive; `src/` is the authoritative source.
51
+
52
+ ### Core module: `src/js/aligner.js`
53
+
54
+ The heart of the library. Two inverse operations:
55
+
56
+ - **`merge(alignments, wordBank, verseString, useVerseText)`** — Takes alignment data and a target verse, rebuilds a nested verse object hierarchy with alignment milestones.
57
+ - **`unmerge(verseObjects, alignedVerse)`** — Extracts structured `alignments` and `wordBank` arrays from verse objects.
58
+
59
+ Supporting functions `restoreVerseObjects()` and `restoreHierarchy()` handle the parent-child reconstruction after array manipulation. `combineConsecutiveText()` cleans up adjacent text nodes in the output. `wordMap` (an array of `{object, parent}` pairs) is used throughout to safely navigate and mutate deeply nested structures without losing position.
60
+
61
+ ### Key data shapes
62
+
63
+ **Alignment:**
64
+ ```js
65
+ {
66
+ topWords: [{ word, strong, lemma, morph, occurrence, occurrences }], // original language
67
+ bottomWords: [{ word, occurrence, occurrences }] // target language
68
+ }
69
+ ```
70
+
71
+ **VerseObject:**
72
+ ```js
73
+ {
74
+ tag: 'w' | 'k' | ...,
75
+ type: 'word' | 'text' | 'paragraph' | 'milestone',
76
+ text?: string,
77
+ children?: VerseObject[],
78
+ occurrence?: number,
79
+ occurrences?: number
80
+ }
81
+ ```
82
+
83
+ Milestones (`type: 'milestone'`) act as alignment containers; their `children` hold the target-language word objects that are aligned to the `topWords` stored on the milestone itself.
84
+
85
+ ### Tests and fixtures
86
+
87
+ Test files live in `__tests__/`. Fixture JSON and USFM files are in `__tests__/fixtures/pivotAlignmentVerseObjects/` — each fixture represents a real Bible verse edge case (nested milestones, punctuation, non-contiguous alignments, etc.).
88
+
89
+ `scripts/BuildTestData.js` regenerates the fixture JSON from USFM sources using `word-aligner-rcl`. Run `npm run build:test-data` after updating fixture source files.
90
+
91
+ ### Morphological parsing
92
+
93
+ `MorphUtils.js` decodes morph code strings (e.g., `Gr,V,,,,,AAN,,`) into localization key arrays using language-specific maps in `src/js/utils/morphCodeLocalizationMap.js`. It branches on Greek vs. Hebrew/Aramaic and handles sub-type disambiguation within each language family.
94
+
95
+ ## Dependencies to know
96
+
97
+ | Package | Role |
98
+ |---|---|
99
+ | `usfm-js` | Parses USFM Bible markup into verse object arrays |
100
+ | `string-punctuation-tokenizer` | Splits verse text, preserving punctuation as separate tokens |
101
+ | `lodash` | `cloneDeep` used when mutating verse object trees |
102
+ | `word-aligner-rcl` | React component library (dev only); used by `BuildTestData.js` |
103
+
104
+ ## Publish workflow
105
+
106
+ ```bash
107
+ npm i --legacy-peer-deps
108
+ npm run build # compiles src/ → lib/
109
+ npm publish # runs npm test + build automatically via prepublishOnly hook
110
+ # then auto-tags git: v$npm_package_version
111
+ ```
package/README.md CHANGED
@@ -1,2 +1,14 @@
1
1
  # word-aligner
2
2
  Logic for handling word alignment
3
+
4
+ ## Publish
5
+
6
+ ```bash
7
+ npm i --legacy-peer-deps && npm run build && npm publish
8
+ ```
9
+
10
+ ## Generate test fixture data
11
+ ```bash
12
+ # Rebuild test fixture JSON files
13
+ npm run build:test-data
14
+ ```
package/lib/js/aligner.js CHANGED
@@ -54,6 +54,134 @@ var hasAlignments = exports.hasAlignments = function hasAlignments(alignments) {
54
54
  return indexFirstAlignment >= 0;
55
55
  };
56
56
 
57
+ /**
58
+ * Combines consecutive text objects in an array of verse objects recursively.
59
+ * When multiple text objects appear consecutively, they are merged into a single text object.
60
+ * Also processes nested children arrays recursively.
61
+ * @param {Array} objects - Array of verse objects to process
62
+ * @return {Array} - Array with consecutive text objects combined
63
+ */
64
+ var combineConsecutiveText = function combineConsecutiveText(objects) {
65
+ var result = [];
66
+ for (var i = 0; i < objects.length; i++) {
67
+ var current = objects[i];
68
+
69
+ if (current.type === 'text' && result.length > 0 && result[result.length - 1].type === 'text') {
70
+ // combine with previous text object
71
+ result[result.length - 1].text += current.text;
72
+ } else {
73
+ // recursively process children if they exist
74
+ if (current.children && Array.isArray(current.children)) {
75
+ current.children = combineConsecutiveText(current.children);
76
+ }
77
+ result.push(current);
78
+ }
79
+ }
80
+ return result;
81
+ };
82
+
83
+ /**
84
+ * Restores verse objects from a flattened state by rebuilding their hierarchical structure,
85
+ * removing null/undefined objects, and combining consecutive text objects.
86
+ * @param {Array} verseObjects - Array of verse objects to restore
87
+ * @return {Array} - Cleaned and restored array of verse objects
88
+ */
89
+ function restoreVerseObjects(verseObjects) {
90
+ restoreHierarchy(verseObjects);
91
+ // remove null objects
92
+ var filteredObjects = verseObjects.filter(function (item) {
93
+ return item !== null && item !== undefined;
94
+ });
95
+ // combine consecutive text objects in nested verseObjects
96
+ var cleanedVerseObjects = combineConsecutiveText(filteredObjects);
97
+ cleanChildReferences({ children: cleanedVerseObjects }, 'parentIndex');
98
+ return cleanedVerseObjects;
99
+ }
100
+
101
+ /**
102
+ * Recursively removes a specified property (default 'parentIndex') from all children
103
+ * in a verse object's hierarchy.
104
+ * @param {Object} verseObject - The verse object whose children should be cleaned
105
+ * @param {string} [key='parentIndex'] - The property key to remove from children
106
+ */
107
+ function cleanChildReferences(verseObject) {
108
+ var key = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 'parentIndex';
109
+
110
+ var children = verseObject.children || [];
111
+ for (var j = 0, cLen = children.length; j < cLen; j++) {
112
+ var child = children[j];
113
+ var childKeyValue = child[key];
114
+ if (childKeyValue >= 0) {
115
+ delete child[key];
116
+ }
117
+ if (child.children) {
118
+ cleanChildReferences(child, key);
119
+ }
120
+ }
121
+ }
122
+
123
+ /**
124
+ * Restores the hierarchical structure of flattened verse objects.
125
+ * Verse objects that have a parentIndex property are moved into their parent's children array
126
+ * and then removed from the top-level array by setting them to null.
127
+ *
128
+ * @param {Array} unalignedOrdered - Array of flattened verse objects that may contain parentIndex properties
129
+ */
130
+ function restoreHierarchy(unalignedOrdered) {
131
+ var toRemove = [];
132
+
133
+ var _loop = function _loop(i, oLen) {
134
+ var verseObject = unalignedOrdered[i];
135
+ var parentIndex = verseObject.parentIndex;
136
+ if (parentIndex >= 0) {
137
+ var parent = unalignedOrdered.find(function (obj) {
138
+ return obj && obj.originalIndex === parentIndex;
139
+ });
140
+ if (parent && parent.children) {
141
+ parent.children.push(verseObject);
142
+ toRemove.push(i);
143
+ }
144
+ delete verseObject.parentIndex;
145
+ delete verseObject.originalIndex;
146
+ cleanChildReferences(verseObject, 'parentIndex');
147
+ }
148
+ };
149
+
150
+ for (var i = 0, oLen = unalignedOrdered.length; i < oLen; i++) {
151
+ _loop(i, oLen);
152
+ }
153
+
154
+ // remove from original location by nulling
155
+ for (var i = toRemove.length - 1; i >= 0; i--) {
156
+ var toRemoveElement = toRemove[i];
157
+ unalignedOrdered.splice(toRemoveElement, 1);
158
+ }
159
+
160
+ // clean up originalIndex property
161
+ for (var _i = 0, oLen = unalignedOrdered.length; _i < oLen; _i++) {
162
+ var _verseObject2 = unalignedOrdered[_i];
163
+ if (_verseObject2 && _verseObject2.originalIndex >= 0) {
164
+ delete _verseObject2.originalIndex;
165
+ }
166
+ cleanChildReferences(_verseObject2, 'originalIndex');
167
+ }
168
+ }
169
+
170
+ /**
171
+ * Saves the original position of each verse object in the array by adding an originalIndex property.
172
+ * This allows tracking of objects' positions before any modifications or deletions occur.
173
+ *
174
+ * @param {Array} unalignedOrdered - Array of verse objects whose positions need to be saved
175
+ */
176
+ function savePosition(unalignedOrdered) {
177
+ for (var i = 0, dLen = unalignedOrdered.length; i < dLen; i++) {
178
+ var _verseObject3 = unalignedOrdered[i];
179
+ if (_verseObject3) {
180
+ _verseObject3.originalIndex = i; // so we can keep track of where the object was before deletions
181
+ }
182
+ }
183
+ }
184
+
57
185
  /**
58
186
  * @description pivots alignments into bottomWords/targetLanguage verseObjectArray sorted by verseText
59
187
  * @param {Array} alignments - array of aligned word objects {bottomWords, topWords}
@@ -94,11 +222,11 @@ var merge = exports.merge = function merge(alignments, wordBank, verseString) {
94
222
  var wbLen = wordBank.length;
95
223
  for (var i = 0; i < wbLen; i++) {
96
224
  var bottomWord = wordBank[i];
97
- var verseObject = VerseObjectUtils.wordVerseObjectFromBottomWord(bottomWord);
98
- var index = VerseObjectUtils.indexOfVerseObject(wordMap, verseObject);
225
+ var _verseObject4 = VerseObjectUtils.wordVerseObjectFromBottomWord(bottomWord);
226
+ var index = VerseObjectUtils.indexOfVerseObject(wordMap, _verseObject4);
99
227
  if (index > -1) {
100
228
  var location = wordMap[index];
101
- location.array[location.pos] = verseObject;
229
+ location.array[location.pos] = _verseObject4;
102
230
  } else if (hasAlignments(alignments)) {
103
231
  // if verse has some alignments
104
232
  throw { message: 'Word "' + bottomWord.word + '" is in wordBank, but missing from target language verse.', type: 'InvalidatedAlignments' };
@@ -109,8 +237,8 @@ var merge = exports.merge = function merge(alignments, wordBank, verseString) {
109
237
  }
110
238
  var indicesToDelete = [];
111
239
  // each alignment should result in one verseObject
112
- for (var _i = 0, aLen = alignments.length; _i < aLen; _i++) {
113
- var alignment = alignments[_i];
240
+ for (var _i2 = 0, aLen = alignments.length; _i2 < aLen; _i2++) {
241
+ var alignment = alignments[_i2];
114
242
  var topWords = alignment.topWords,
115
243
  bottomWords = alignment.bottomWords;
116
244
  // each bottomWord results in a nested verseObject of tag: w, type: word
@@ -119,12 +247,12 @@ var merge = exports.merge = function merge(alignments, wordBank, verseString) {
119
247
  var replacements = {};
120
248
  for (var j = 0, bwLen = bottomWords.length; j < bwLen; j++) {
121
249
  var _bottomWord = bottomWords[j];
122
- var _verseObject2 = VerseObjectUtils.wordVerseObjectFromBottomWord(_bottomWord);
123
- var _index = VerseObjectUtils.indexOfVerseObject(wordMap, _verseObject2);
250
+ var _verseObject5 = VerseObjectUtils.wordVerseObjectFromBottomWord(_bottomWord);
251
+ var _index = VerseObjectUtils.indexOfVerseObject(wordMap, _verseObject5);
124
252
  if (_index === -1) {
125
- throw { message: 'VerseObject not found in verseText while merging:' + (0, _stringify2.default)(_verseObject2), type: 'InvalidatedAlignments' };
253
+ throw { message: 'VerseObject not found in verseText while merging:' + (0, _stringify2.default)(_verseObject5), type: 'InvalidatedAlignments' };
126
254
  }
127
- replacements[_index] = _verseObject2;
255
+ replacements[_index] = _verseObject5;
128
256
  }
129
257
  // each topWord results in a nested verseObject of tag: k, type: milestone
130
258
  var milestones = topWords.map(function (topWord) {
@@ -156,12 +284,19 @@ var merge = exports.merge = function merge(alignments, wordBank, verseString) {
156
284
  var milestone = VerseObjectUtils.nestMilestones(milestones);
157
285
  // replace the original verseObject from the verse text with the aligned milestone verseObject
158
286
  var _location = wordMap[indexToReplace];
287
+ if (_location.parentIndex >= 0) {
288
+ milestone.parentIndex = _location.parentIndex; // preserve the parent index
289
+ }
159
290
  _location.array[_location.pos] = milestone;
160
291
  }
161
292
  }
293
+
294
+ savePosition(unalignedOrdered); // save original position of each verseObject to keep track even after deletions
295
+
162
296
  // deleteIndices that were queued due to consecutive bottomWords in alignments
163
297
  var verseObjects = ArrayUtils.deleteIndices(unalignedOrdered, indicesToDelete, wordMap);
164
- return verseObjects;
298
+ var restoredObjects = restoreVerseObjects(verseObjects);
299
+ return restoredObjects;
165
300
  };
166
301
 
167
302
  /**
@@ -413,13 +548,13 @@ var unmerge = exports.unmerge = function unmerge(verseObjects, alignedVerse) {
413
548
  }
414
549
  var len = verseObjects.length;
415
550
  for (var i = 0; i < len; i++) {
416
- var verseObject = verseObjects[i];
417
- addAlignment(baseMilestones, verseObject, alignments);
551
+ var _verseObject6 = verseObjects[i];
552
+ addAlignment(baseMilestones, _verseObject6, alignments);
418
553
  }
419
554
  var alignmentUnOrdered = [];
420
555
  len = alignments.length;
421
- for (var _i2 = 0; _i2 < len; _i2++) {
422
- var _alignment = alignments[_i2];
556
+ for (var _i3 = 0; _i3 < len; _i3++) {
557
+ var _alignment = alignments[_i3];
423
558
  if (_alignment.topWords.length > 0) {
424
559
  alignmentUnOrdered.push(_alignment);
425
560
  } else {
@@ -185,21 +185,29 @@ var getVerseObjectsText = function getVerseObjectsText(verseObjects) {
185
185
  };
186
186
 
187
187
  /**
188
- * make sure we pick up white space between tokens
189
- * @param {string} text - string to tokenize
190
- * @param {Number} lastPos - position of end of last token
191
- * @param {Number} pos - position to grab up to
192
- * @param {Array} newVerseObjects - nested verse objects
193
- * @param {Boolean} end - if true, then at end of line
194
- * @return {{lastPos: *, verseObject: *}} - new verse object and updated position
188
+ * Fills gaps (whitespace and text) between tokens in the verse object array.
189
+ * Ensures whitespace between tokens is preserved by creating text verse objects.
190
+ * If possible, appends to the previous text object if it exists at the same nesting level;
191
+ * otherwise creates a new text verse object.
192
+ *
193
+ * @param {string} text - The complete string being tokenized
194
+ * @param {Number} lastPos - Position of the end of the last processed token
195
+ * @param {Number} pos - Position to process up to (start of next token or end of string)
196
+ * @param {Array} newVerseObjects - Array of verse objects being populated
197
+ * @param {Boolean} [end=false] - If true, forces creation of text object even if gap is empty (for end of line)
198
+ * @param {Number} [parentIndex=-1] - Index of parent verse object if nested, -1 if at root level
199
+ * @return {Number} Updated position after processing the gap (lastPos + gap.length)
195
200
  */
196
201
  var fillGap = function fillGap(text, lastPos, pos, newVerseObjects) {
197
202
  var end = arguments.length > 4 && arguments[4] !== undefined ? arguments[4] : false;
203
+ var parentIndex = arguments.length > 5 && arguments[5] !== undefined ? arguments[5] : -1;
198
204
 
199
205
  var verseObject = null;
200
206
  var gap = text.substring(lastPos, pos);
201
207
  var lastVerseObject = newVerseObjects.length && newVerseObjects[newVerseObjects.length - 1];
202
- if (lastVerseObject && lastVerseObject.type === 'text') {
208
+ var lastParentIndex = typeof lastVerseObject.parentIndex === 'number' ? lastVerseObject.parentIndex : -1;
209
+ var canAppendToPreviousText = lastVerseObject && lastVerseObject.type === 'text' && lastParentIndex === parentIndex;
210
+ if (canAppendToPreviousText) {
203
211
  // append to previous text
204
212
  lastVerseObject.text += gap;
205
213
  } else if (end || gap) {
@@ -208,6 +216,11 @@ var fillGap = function fillGap(text, lastPos, pos, newVerseObjects) {
208
216
  type: 'text',
209
217
  text: gap
210
218
  };
219
+
220
+ if (parentIndex >= 0) {
221
+ verseObject.parentIndex = parentIndex;
222
+ }
223
+
211
224
  newVerseObjects.push(verseObject);
212
225
  }
213
226
  lastPos += gap.length;
@@ -215,15 +228,22 @@ var fillGap = function fillGap(text, lastPos, pos, newVerseObjects) {
215
228
  };
216
229
 
217
230
  /**
218
- * parse text into tokens
219
- * @param {string} text - string to tokenize
220
- * @param {Array} newVerseObjects - nested verse objects
221
- * @param {Array} wordMap - ordered map of word locations in verseObjects
222
- * @param {Number} nonWordVerseObjectCount - keeps count of entries that are not actually words
223
- * @param {String} verseText - text of the entire verse
224
- * @return {Number} new nonWordVerseObjectCount
231
+ * Parses text into tokens and creates word or text verse objects.
232
+ * Tokenizes the input text and identifies words (containing word/number characters)
233
+ * versus punctuation/text. For words, creates word objects with occurrence tracking.
234
+ * For non-word tokens, creates text objects. Preserves whitespace between tokens.
235
+ *
236
+ * @param {string} text - The string to tokenize
237
+ * @param {Array} newVerseObjects - Array to populate with newly created verse objects
238
+ * @param {Array} wordMap - Ordered map tracking word locations in verseObjects for occurrence counting
239
+ * @param {Number} nonWordVerseObjectCount - Counter for entries that are not words (text/punctuation)
240
+ * @param {String} verseText - Complete text of the entire verse for occurrence calculation
241
+ * @param {Number} [parentIndex=-1] - Index of parent verse object if this text is nested, -1 if at root level
242
+ * @return {Number} Updated nonWordVerseObjectCount after processing
225
243
  */
226
244
  var tokenizeText = function tokenizeText(text, newVerseObjects, wordMap, nonWordVerseObjectCount, verseText) {
245
+ var parentIndex = arguments.length > 5 && arguments[5] !== undefined ? arguments[5] : -1;
246
+
227
247
  if (text) {
228
248
  var tokens = tokenizer.tokenize({ text: text, includePunctuation: true });
229
249
  var tokenLength = tokens.length;
@@ -234,7 +254,7 @@ var tokenizeText = function tokenizeText(text, newVerseObjects, wordMap, nonWord
234
254
  var pos = text.indexOf(word, lastPos);
235
255
  if (pos > lastPos) {
236
256
  // make sure we are not dropping white space
237
- lastPos = fillGap(text, lastPos, pos, newVerseObjects);
257
+ lastPos = fillGap(text, lastPos, pos, newVerseObjects, false, parentIndex);
238
258
  }
239
259
  if (tokenizer.word.test(word) || tokenizer.number.test(word)) {
240
260
  // if the text has word or number characters, its a word object
@@ -249,7 +269,8 @@ var tokenizeText = function tokenizeText(text, newVerseObjects, wordMap, nonWord
249
269
  occurrence: occurrence,
250
270
  occurrences: occurrences
251
271
  };
252
- wordMap.push({ array: newVerseObjects, pos: newVerseObjects.length });
272
+ var _pos = newVerseObjects.length;
273
+ wordMap.push({ array: newVerseObjects, pos: _pos, parentIndex: parentIndex });
253
274
  } else {
254
275
  // the text does not have word characters
255
276
  nonWordVerseObjectCount++;
@@ -259,28 +280,46 @@ var tokenizeText = function tokenizeText(text, newVerseObjects, wordMap, nonWord
259
280
  };
260
281
  }
261
282
  lastPos += word.length;
283
+
284
+ if (parentIndex >= 0) {
285
+ verseObject.parentIndex = parentIndex;
286
+ }
287
+
262
288
  newVerseObjects.push(verseObject);
263
289
  }
264
290
  if (lastPos < text.length) {
265
- lastPos = fillGap(text, lastPos, text.length, newVerseObjects, true);
291
+ lastPos = fillGap(text, lastPos, text.length, newVerseObjects, true, parentIndex);
266
292
  }
267
293
  }
268
294
  return nonWordVerseObjectCount;
269
295
  };
270
296
 
271
297
  /**
272
- * step through verse objects extracting words
273
- * @param {Array} verseObjects - original array of verse objects with words split
274
- * @param {Array} newVerseObjects - new array of verse objects with words split
275
- * @param {Array} wordMap - ordered map of word locations in verseObjects
276
- * @param {String} verseText - text of the entire verse
277
- * @param {Number} nonWordVerseObjectCount - keeps count of entries that are not actually words
278
- * @return {Number} updated nonWordVerseObjectCount
298
+ * Recursively processes nested verse objects to extract and tokenize words.
299
+ * Traverses through verse objects, preserving non-text objects (like milestones) while
300
+ * extracting and tokenizing any text content. Handles nested children recursively.
301
+ * Maintains parent-child relationships through parentIndex tracking.
302
+ *
303
+ * @param {Array} verseObjects - Original array of verse objects to process (may contain nested structures)
304
+ * @param {Array} newVerseObjects - Output array to populate with processed verse objects with words split
305
+ * @param {Array} wordMap - Ordered map tracking word locations in verseObjects for occurrence counting
306
+ * @param {String} verseText - Complete text of the entire verse for occurrence calculation
307
+ * @param {Number} nonWordVerseObjectCount - Counter for entries that are not words (text/punctuation)
308
+ * @param {Number} [parentIndex=-1] - Index of parent verse object for nested elements, -1 if at root level
309
+ * @return {Number} Updated nonWordVerseObjectCount after processing all verse objects
279
310
  */
280
311
  var getWordsFromNestedVerseObjects = function getWordsFromNestedVerseObjects(verseObjects, newVerseObjects, wordMap, verseText, nonWordVerseObjectCount) {
312
+ var parentIndex = arguments.length > 5 && arguments[5] !== undefined ? arguments[5] : -1;
313
+
281
314
  var voLength = verseObjects.length;
282
315
  for (var i = 0; i < voLength; i++) {
283
316
  var verseObject = verseObjects[i];
317
+
318
+ if (parentIndex >= 0) {
319
+ // keep track of where the parent is
320
+ verseObject.parentIndex = parentIndex;
321
+ }
322
+
284
323
  var vsObjText = verseObject.text;
285
324
  if (verseObject.type !== 'text') {
286
325
  // preseserve non-text verseObject except for text part which will be split into words
@@ -292,16 +331,17 @@ var getWordsFromNestedVerseObjects = function getWordsFromNestedVerseObjects(ver
292
331
  verseObject.nextChar = ' '; // preserve space before text
293
332
  }
294
333
  newVerseObjects.push(verseObject);
334
+ var indexOfThisObject = newVerseObjects.length - 1;
295
335
  if (verseObject.children) {
296
336
  var newChildVerseObjects = [];
297
- nonWordVerseObjectCount = tokenizeText(vsObjText, newChildVerseObjects, wordMap, nonWordVerseObjectCount, verseText);
298
- nonWordVerseObjectCount = getWordsFromNestedVerseObjects(verseObject.children, newChildVerseObjects, wordMap, verseText, nonWordVerseObjectCount);
337
+ nonWordVerseObjectCount = tokenizeText(vsObjText, newChildVerseObjects, wordMap, nonWordVerseObjectCount, verseText, indexOfThisObject);
338
+ nonWordVerseObjectCount = getWordsFromNestedVerseObjects(verseObject.children, newChildVerseObjects, wordMap, verseText, nonWordVerseObjectCount, indexOfThisObject);
299
339
  verseObject.children = newChildVerseObjects;
300
340
  } else {
301
- nonWordVerseObjectCount = tokenizeText(vsObjText, newVerseObjects, wordMap, nonWordVerseObjectCount, verseText);
341
+ nonWordVerseObjectCount = tokenizeText(vsObjText, newVerseObjects, wordMap, nonWordVerseObjectCount, verseText, indexOfThisObject);
302
342
  }
303
343
  } else {
304
- nonWordVerseObjectCount = tokenizeText(vsObjText, newVerseObjects, wordMap, nonWordVerseObjectCount, verseText);
344
+ nonWordVerseObjectCount = tokenizeText(vsObjText, newVerseObjects, wordMap, nonWordVerseObjectCount, verseText, parentIndex);
305
345
  }
306
346
  }
307
347
  return nonWordVerseObjectCount;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "word-aligner",
3
- "version": "1.0.3",
3
+ "version": "1.1.0",
4
4
  "description": "A library for handling word alignment",
5
5
  "main": "lib/index.js",
6
6
  "scripts": {
@@ -9,6 +9,8 @@
9
9
  "compile": "rimraf lib && babel src/ -d lib/",
10
10
  "prebuild": "rm -rf ./lib",
11
11
  "build": "babel ./src -d ./lib",
12
+ "build:test-data": "node scripts/BuildTestData.js",
13
+ "debug:test-data": "node --inspect-brk scripts/BuildTestData.js",
12
14
  "prepare": "if [ ! -d './lib/' ]; then npm run build; fi",
13
15
  "prepublishOnly": "npm test && npm run build",
14
16
  "postpublish": "git tag v$npm_package_version && git push origin v$npm_package_version"
@@ -38,7 +40,7 @@
38
40
  },
39
41
  "homepage": "https://github.com/unfoldingWord/word-aligner#readme",
40
42
  "peerDependencies": {
41
- "usfm-js": "^2.1.0"
43
+ "usfm-js": "^3.5.0"
42
44
  },
43
45
  "devDependencies": {
44
46
  "babel-cli": "^6.26.0",
@@ -48,6 +50,7 @@
48
50
  "babel-plugin-transform-runtime": "^6.23.0",
49
51
  "babel-preset-env": "^1.7.0",
50
52
  "babel-preset-es2015": "^6.24.1",
53
+ "deep-equal": "1.1.2",
51
54
  "eslint": "^5.12.1",
52
55
  "eslint-config-google": "^0.12.0",
53
56
  "eslint-plugin-jest": "^22.1.3",
@@ -58,7 +61,8 @@
58
61
  "path": "0.12.7",
59
62
  "path-extra": "^4.2.1",
60
63
  "rimraf": "^2.6.2",
61
- "usfm-js": "2.1.0"
64
+ "usfm-js": "3.5.0",
65
+ "word-aligner-rcl": "1.3.7-beta.6"
62
66
  },
63
67
  "dependencies": {
64
68
  "babel-runtime": "^6.26.0",
@@ -0,0 +1,117 @@
1
+ const path = require('path');
2
+ const fs = require('fs-extra');
3
+ const {AlignmentHelpers, UsfmFileConversionHelpers, usfmHelpers} = require('word-aligner-rcl');
4
+
5
+ const RESOURCES = path.join('__tests__', 'fixtures', 'pivotAlignmentVerseObjects');
6
+ // const folder = fs.readdirSync(RESOURCES);
7
+ // console.log(folder);
8
+
9
+ /**
10
+ * Reads a json file from the resources dir
11
+ * @param {string} filename relative path to usfm file
12
+ * @return {Object} - The read JSON object
13
+ */
14
+ const readJSON = filename => {
15
+ const fullPath = path.join(RESOURCES, filename);
16
+ if (fs.existsSync(fullPath)) {
17
+ const json = fs.readJsonSync(fullPath);
18
+ return json;
19
+ }
20
+ console.log('File not found.');
21
+ return false;
22
+ };
23
+
24
+ /**
25
+ * Writes a JSON object to a file in the resources dir
26
+ * @param {string} filename relative path to json file
27
+ * @param {Object} json - The JSON object to write
28
+ */
29
+ const writeJSON = (filename, json) => {
30
+ const fullPath = path.join(RESOURCES, filename);
31
+ fs.writeJsonSync(fullPath, json, {spaces: 2});
32
+ };
33
+
34
+ /**
35
+ * Recursively normalizes verse objects by converting occurrence properties to a standardized format.
36
+ * Traverses the verse objects array and their nested children, applying occurrence conversions where needed.
37
+ *
38
+ * @param {Array} verseObjects - Array of verse objects to normalize. Each object may contain occurrence
39
+ * properties and/or children arrays that will be processed recursively.
40
+ */
41
+ function normalizeVerseObjects(verseObjects) {
42
+ if (Array.isArray(verseObjects)) {
43
+ for (let i = 0; i < verseObjects.length; i++) {
44
+ const item = verseObjects[i];
45
+ if (item.occurrence) {
46
+ const newItem = AlignmentHelpers.convertOccurrencesInWord(item);
47
+ if (newItem) {
48
+ verseObjects[i] = newItem;
49
+ }
50
+ }
51
+ if (item.children) {
52
+ normalizeVerseObjects(item.children);
53
+ }
54
+ }
55
+ }
56
+ }
57
+
58
+ /**
59
+ * Converts an aligned verse string to verse objects
60
+ * @param {string} alignedVerseString - The USFM aligned verse string to convert
61
+ * @return {Array} - Array of verse objects
62
+ */
63
+ function getVerseObjects(alignedVerseString) {
64
+ const verseObjects = usfmHelpers.usfmVerseToJson(alignedVerseString);
65
+ normalizeVerseObjects(verseObjects);
66
+ return verseObjects;
67
+ }
68
+
69
+ /**
70
+ * Builds test data by reading a template JSON file and populating missing fields
71
+ * Generates verseObjects from alignedVerseString if not present
72
+ * Generates alignment and wordBank from alignedVerseString if alignment is empty
73
+ */
74
+ function buildTestData() {
75
+ const testData = readJSON('template.json');
76
+ console.log(testData.comment);
77
+
78
+ if (testData.alignedVerseString) {
79
+ console.log(`found alignedVerseString: ${testData.alignedVerseString.length} chars`);
80
+
81
+ if (!testData.verseObjects.length) {
82
+ console.log('Generating verseObjects from alignedVerseString');
83
+ const verseObjects = getVerseObjects(testData.alignedVerseString);
84
+
85
+ if (verseObjects) {
86
+ testData.verseObjects = verseObjects;
87
+ }
88
+ }
89
+ }
90
+
91
+ if (testData.alignedVerseString && testData.verseObjects.length) {
92
+ if (!testData.alignment.length) {
93
+ console.log('Generating alignment and wordBank from alignedVerseString');
94
+ AlignmentHelpers.extractAlignmentsFromTargetVerse(testData.alignedVerseString);
95
+ const {
96
+ targetWords: wordBank,
97
+ verseAlignments: alignments,
98
+ } = AlignmentHelpers.parseUsfmToWordAlignerData(testData.alignedVerseString, null);
99
+ const cleanedAlignments = AlignmentHelpers.getCleanedAlignments(wordBank, alignments);
100
+ testData.alignment = cleanedAlignments.alignments;
101
+ testData.wordBank = cleanedAlignments.wordBank;
102
+ }
103
+
104
+ if (!testData.verseString) {
105
+ console.log('Generating verseString from verseObjects');
106
+ const verseString = UsfmFileConversionHelpers.getUsfmForVerseContent(testData.verseObjects);
107
+ console.log(`verseString: ${verseString}`);
108
+ testData.verseString = verseString;
109
+ }
110
+
111
+ }
112
+
113
+ delete testData.comment;
114
+ console.log('New test data:', testData);
115
+ writeJSON('new-test-data.json', testData);
116
+ }
117
+ buildTestData();
package/src/js/aligner.js CHANGED
@@ -20,6 +20,124 @@ export const hasAlignments = (alignments) => {
20
20
  return indexFirstAlignment >= 0;
21
21
  };
22
22
 
23
+ /**
24
+ * Combines consecutive text objects in an array of verse objects recursively.
25
+ * When multiple text objects appear consecutively, they are merged into a single text object.
26
+ * Also processes nested children arrays recursively.
27
+ * @param {Array} objects - Array of verse objects to process
28
+ * @return {Array} - Array with consecutive text objects combined
29
+ */
30
+ const combineConsecutiveText = (objects) => {
31
+ const result = [];
32
+ for (let i = 0; i < objects.length; i++) {
33
+ const current = objects[i];
34
+
35
+ if (current.type === 'text' && result.length > 0 && result[result.length - 1].type === 'text') {
36
+ // combine with previous text object
37
+ result[result.length - 1].text += current.text;
38
+ } else {
39
+ // recursively process children if they exist
40
+ if (current.children && Array.isArray(current.children)) {
41
+ current.children = combineConsecutiveText(current.children);
42
+ }
43
+ result.push(current);
44
+ }
45
+ }
46
+ return result;
47
+ };
48
+
49
+ /**
50
+ * Restores verse objects from a flattened state by rebuilding their hierarchical structure,
51
+ * removing null/undefined objects, and combining consecutive text objects.
52
+ * @param {Array} verseObjects - Array of verse objects to restore
53
+ * @return {Array} - Cleaned and restored array of verse objects
54
+ */
55
+ function restoreVerseObjects(verseObjects) {
56
+ restoreHierarchy(verseObjects);
57
+ // remove null objects
58
+ const filteredObjects = verseObjects.filter(item => item !== null && item !== undefined);
59
+ // combine consecutive text objects in nested verseObjects
60
+ const cleanedVerseObjects = combineConsecutiveText(filteredObjects);
61
+ cleanChildReferences({children: cleanedVerseObjects}, 'parentIndex');
62
+ return cleanedVerseObjects;
63
+ }
64
+
65
+ /**
66
+ * Recursively removes a specified property (default 'parentIndex') from all children
67
+ * in a verse object's hierarchy.
68
+ * @param {Object} verseObject - The verse object whose children should be cleaned
69
+ * @param {string} [key='parentIndex'] - The property key to remove from children
70
+ */
71
+ function cleanChildReferences(verseObject, key = 'parentIndex') {
72
+ const children = verseObject.children || [];
73
+ for (let j = 0, cLen = children.length; j < cLen; j++) {
74
+ const child = children[j];
75
+ const childKeyValue = child[key];
76
+ if (childKeyValue >= 0) {
77
+ delete child[key];
78
+ }
79
+ if (child.children) {
80
+ cleanChildReferences(child, key);
81
+ }
82
+ }
83
+ }
84
+
85
+ /**
86
+ * Restores the hierarchical structure of flattened verse objects.
87
+ * Verse objects that have a parentIndex property are moved into their parent's children array
88
+ * and then removed from the top-level array by setting them to null.
89
+ *
90
+ * @param {Array} unalignedOrdered - Array of flattened verse objects that may contain parentIndex properties
91
+ */
92
+ function restoreHierarchy(unalignedOrdered) {
93
+ const toRemove = [];
94
+
95
+ for (let i = 0, oLen = unalignedOrdered.length; i < oLen; i++) {
96
+ const verseObject = unalignedOrdered[i];
97
+ const parentIndex = verseObject.parentIndex;
98
+ if (parentIndex >= 0) {
99
+ const parent = unalignedOrdered.find(obj => obj && obj.originalIndex === parentIndex);
100
+ if (parent && parent.children) {
101
+ parent.children.push(verseObject);
102
+ toRemove.push(i);
103
+ }
104
+ delete verseObject.parentIndex;
105
+ delete verseObject.originalIndex;
106
+ cleanChildReferences(verseObject, 'parentIndex');
107
+ }
108
+ }
109
+
110
+ // remove from original location by nulling
111
+ for (let i = toRemove.length - 1; i >= 0; i--) {
112
+ const toRemoveElement = toRemove[i];
113
+ unalignedOrdered.splice(toRemoveElement, 1);
114
+ }
115
+
116
+ // clean up originalIndex property
117
+ for (let i = 0, oLen = unalignedOrdered.length; i < oLen; i++) {
118
+ const verseObject = unalignedOrdered[i];
119
+ if (verseObject && (verseObject.originalIndex >= 0)) {
120
+ delete verseObject.originalIndex;
121
+ }
122
+ cleanChildReferences(verseObject, 'originalIndex');
123
+ }
124
+ }
125
+
126
+ /**
127
+ * Saves the original position of each verse object in the array by adding an originalIndex property.
128
+ * This allows tracking of objects' positions before any modifications or deletions occur.
129
+ *
130
+ * @param {Array} unalignedOrdered - Array of verse objects whose positions need to be saved
131
+ */
132
+ function savePosition(unalignedOrdered) {
133
+ for (let i = 0, dLen = unalignedOrdered.length; i < dLen; i++) {
134
+ const verseObject = unalignedOrdered[i];
135
+ if (verseObject) {
136
+ verseObject.originalIndex = i; // so we can keep track of where the object was before deletions
137
+ }
138
+ }
139
+ }
140
+
23
141
  /**
24
142
  * @description pivots alignments into bottomWords/targetLanguage verseObjectArray sorted by verseText
25
143
  * @param {Array} alignments - array of aligned word objects {bottomWords, topWords}
@@ -113,12 +231,19 @@ export const merge = (alignments, wordBank, verseString,
113
231
  const milestone = VerseObjectUtils.nestMilestones(milestones);
114
232
  // replace the original verseObject from the verse text with the aligned milestone verseObject
115
233
  const location = wordMap[indexToReplace];
234
+ if (location.parentIndex >= 0) {
235
+ milestone.parentIndex = location.parentIndex; // preserve the parent index
236
+ }
116
237
  location.array[location.pos] = milestone;
117
238
  }
118
239
  }
240
+
241
+ savePosition(unalignedOrdered); // save original position of each verseObject to keep track even after deletions
242
+
119
243
  // deleteIndices that were queued due to consecutive bottomWords in alignments
120
244
  const verseObjects = ArrayUtils.deleteIndices(unalignedOrdered, indicesToDelete, wordMap);
121
- return verseObjects;
245
+ const restoredObjects = restoreVerseObjects(verseObjects);
246
+ return restoredObjects;
122
247
  };
123
248
 
124
249
  /**
@@ -129,25 +129,38 @@ const getVerseObjectsText = (verseObjects) => {
129
129
  };
130
130
 
131
131
  /**
132
- * make sure we pick up white space between tokens
133
- * @param {string} text - string to tokenize
134
- * @param {Number} lastPos - position of end of last token
135
- * @param {Number} pos - position to grab up to
136
- * @param {Array} newVerseObjects - nested verse objects
137
- * @param {Boolean} end - if true, then at end of line
138
- * @return {{lastPos: *, verseObject: *}} - new verse object and updated position
132
+ * Fills gaps (whitespace and text) between tokens in the verse object array.
133
+ * Ensures whitespace between tokens is preserved by creating text verse objects.
134
+ * If possible, appends to the previous text object if it exists at the same nesting level;
135
+ * otherwise creates a new text verse object.
136
+ *
137
+ * @param {string} text - The complete string being tokenized
138
+ * @param {Number} lastPos - Position of the end of the last processed token
139
+ * @param {Number} pos - Position to process up to (start of next token or end of string)
140
+ * @param {Array} newVerseObjects - Array of verse objects being populated
141
+ * @param {Boolean} [end=false] - If true, forces creation of text object even if gap is empty (for end of line)
142
+ * @param {Number} [parentIndex=-1] - Index of parent verse object if nested, -1 if at root level
143
+ * @return {Number} Updated position after processing the gap (lastPos + gap.length)
139
144
  */
140
- const fillGap = (text, lastPos, pos, newVerseObjects, end = false) => {
145
+ const fillGap = (text, lastPos, pos, newVerseObjects, end = false, parentIndex = -1) => {
141
146
  let verseObject = null;
142
147
  const gap = text.substring(lastPos, pos);
143
148
  const lastVerseObject = newVerseObjects.length && newVerseObjects[newVerseObjects.length - 1];
144
- if (lastVerseObject && (lastVerseObject.type === 'text')) { // append to previous text
149
+ const lastParentIndex = (typeof lastVerseObject.parentIndex === 'number') ? lastVerseObject.parentIndex : -1;
150
+ const canAppendToPreviousText = lastVerseObject && (lastVerseObject.type === 'text')
151
+ && (lastParentIndex === parentIndex);
152
+ if (canAppendToPreviousText) { // append to previous text
145
153
  lastVerseObject.text += gap;
146
154
  } else if (end || gap) { // save gap
147
155
  verseObject = {
148
156
  type: 'text',
149
157
  text: gap,
150
158
  };
159
+
160
+ if (parentIndex >= 0) {
161
+ verseObject.parentIndex = parentIndex;
162
+ }
163
+
151
164
  newVerseObjects.push(verseObject);
152
165
  }
153
166
  lastPos += gap.length;
@@ -155,15 +168,20 @@ const fillGap = (text, lastPos, pos, newVerseObjects, end = false) => {
155
168
  };
156
169
 
157
170
  /**
158
- * parse text into tokens
159
- * @param {string} text - string to tokenize
160
- * @param {Array} newVerseObjects - nested verse objects
161
- * @param {Array} wordMap - ordered map of word locations in verseObjects
162
- * @param {Number} nonWordVerseObjectCount - keeps count of entries that are not actually words
163
- * @param {String} verseText - text of the entire verse
164
- * @return {Number} new nonWordVerseObjectCount
171
+ * Parses text into tokens and creates word or text verse objects.
172
+ * Tokenizes the input text and identifies words (containing word/number characters)
173
+ * versus punctuation/text. For words, creates word objects with occurrence tracking.
174
+ * For non-word tokens, creates text objects. Preserves whitespace between tokens.
175
+ *
176
+ * @param {string} text - The string to tokenize
177
+ * @param {Array} newVerseObjects - Array to populate with newly created verse objects
178
+ * @param {Array} wordMap - Ordered map tracking word locations in verseObjects for occurrence counting
179
+ * @param {Number} nonWordVerseObjectCount - Counter for entries that are not words (text/punctuation)
180
+ * @param {String} verseText - Complete text of the entire verse for occurrence calculation
181
+ * @param {Number} [parentIndex=-1] - Index of parent verse object if this text is nested, -1 if at root level
182
+ * @return {Number} Updated nonWordVerseObjectCount after processing
165
183
  */
166
- const tokenizeText = (text, newVerseObjects, wordMap, nonWordVerseObjectCount, verseText) => {
184
+ const tokenizeText = (text, newVerseObjects, wordMap, nonWordVerseObjectCount, verseText, parentIndex = -1) => {
167
185
  if (text) {
168
186
  const tokens = tokenizer.tokenize({text, includePunctuation: true});
169
187
  const tokenLength = tokens.length;
@@ -173,7 +191,7 @@ const tokenizeText = (text, newVerseObjects, wordMap, nonWordVerseObjectCount, v
173
191
  const word = tokens[j];
174
192
  const pos = text.indexOf(word, lastPos);
175
193
  if (pos > lastPos) { // make sure we are not dropping white space
176
- lastPos = fillGap(text, lastPos, pos, newVerseObjects);
194
+ lastPos = fillGap(text, lastPos, pos, newVerseObjects, false, parentIndex);
177
195
  }
178
196
  if (tokenizer.word.test(word) || tokenizer.number.test(word)) { // if the text has word or number characters, its a word object
179
197
  const wordIndex = wordMap.length;
@@ -192,7 +210,8 @@ const tokenizeText = (text, newVerseObjects, wordMap, nonWordVerseObjectCount, v
192
210
  occurrence,
193
211
  occurrences,
194
212
  };
195
- wordMap.push({array: newVerseObjects, pos: newVerseObjects.length});
213
+ const pos = newVerseObjects.length;
214
+ wordMap.push({array: newVerseObjects, pos, parentIndex});
196
215
  } else { // the text does not have word characters
197
216
  nonWordVerseObjectCount++;
198
217
  verseObject = {
@@ -201,28 +220,50 @@ const tokenizeText = (text, newVerseObjects, wordMap, nonWordVerseObjectCount, v
201
220
  };
202
221
  }
203
222
  lastPos += word.length;
223
+
224
+ if (parentIndex >= 0) {
225
+ verseObject.parentIndex = parentIndex;
226
+ }
227
+
204
228
  newVerseObjects.push(verseObject);
205
229
  }
206
230
  if (lastPos < text.length) {
207
- lastPos = fillGap(text, lastPos, text.length, newVerseObjects, true);
231
+ lastPos = fillGap(text, lastPos, text.length, newVerseObjects, true, parentIndex);
208
232
  }
209
233
  }
210
234
  return nonWordVerseObjectCount;
211
235
  };
212
236
 
213
237
  /**
214
- * step through verse objects extracting words
215
- * @param {Array} verseObjects - original array of verse objects with words split
216
- * @param {Array} newVerseObjects - new array of verse objects with words split
217
- * @param {Array} wordMap - ordered map of word locations in verseObjects
218
- * @param {String} verseText - text of the entire verse
219
- * @param {Number} nonWordVerseObjectCount - keeps count of entries that are not actually words
220
- * @return {Number} updated nonWordVerseObjectCount
238
+ * Recursively processes nested verse objects to extract and tokenize words.
239
+ * Traverses through verse objects, preserving non-text objects (like milestones) while
240
+ * extracting and tokenizing any text content. Handles nested children recursively.
241
+ * Maintains parent-child relationships through parentIndex tracking.
242
+ *
243
+ * @param {Array} verseObjects - Original array of verse objects to process (may contain nested structures)
244
+ * @param {Array} newVerseObjects - Output array to populate with processed verse objects with words split
245
+ * @param {Array} wordMap - Ordered map tracking word locations in verseObjects for occurrence counting
246
+ * @param {String} verseText - Complete text of the entire verse for occurrence calculation
247
+ * @param {Number} nonWordVerseObjectCount - Counter for entries that are not words (text/punctuation)
248
+ * @param {Number} [parentIndex=-1] - Index of parent verse object for nested elements, -1 if at root level
249
+ * @return {Number} Updated nonWordVerseObjectCount after processing all verse objects
221
250
  */
222
- const getWordsFromNestedVerseObjects = (verseObjects, newVerseObjects, wordMap, verseText, nonWordVerseObjectCount) => {
251
+ const getWordsFromNestedVerseObjects = (
252
+ verseObjects,
253
+ newVerseObjects,
254
+ wordMap,
255
+ verseText,
256
+ nonWordVerseObjectCount,
257
+ parentIndex = -1
258
+ ) => {
223
259
  const voLength = verseObjects.length;
224
260
  for (let i = 0; i < voLength; i++) {
225
261
  const verseObject = verseObjects[i];
262
+
263
+ if (parentIndex >= 0) { // keep track of where the parent is
264
+ verseObject.parentIndex = parentIndex;
265
+ }
266
+
226
267
  let vsObjText = verseObject.text;
227
268
  if ((verseObject.type !== 'text')) {
228
269
  // preseserve non-text verseObject except for text part which will be split into words
@@ -234,17 +275,19 @@ const getWordsFromNestedVerseObjects = (verseObjects, newVerseObjects, wordMap,
234
275
  verseObject.nextChar = ' '; // preserve space before text
235
276
  }
236
277
  newVerseObjects.push(verseObject);
278
+ const indexOfThisObject = newVerseObjects.length - 1;
237
279
  if (verseObject.children) {
238
280
  const newChildVerseObjects = [];
239
- nonWordVerseObjectCount = tokenizeText(vsObjText, newChildVerseObjects, wordMap, nonWordVerseObjectCount, verseText);
281
+ nonWordVerseObjectCount = tokenizeText(vsObjText, newChildVerseObjects, wordMap, nonWordVerseObjectCount, verseText, indexOfThisObject);
240
282
  nonWordVerseObjectCount = getWordsFromNestedVerseObjects(verseObject.children, newChildVerseObjects,
241
- wordMap, verseText, nonWordVerseObjectCount);
283
+ wordMap, verseText, nonWordVerseObjectCount,
284
+ indexOfThisObject);
242
285
  verseObject.children = newChildVerseObjects;
243
286
  } else {
244
- nonWordVerseObjectCount = tokenizeText(vsObjText, newVerseObjects, wordMap, nonWordVerseObjectCount, verseText);
287
+ nonWordVerseObjectCount = tokenizeText(vsObjText, newVerseObjects, wordMap, nonWordVerseObjectCount, verseText, indexOfThisObject);
245
288
  }
246
289
  } else {
247
- nonWordVerseObjectCount = tokenizeText(vsObjText, newVerseObjects, wordMap, nonWordVerseObjectCount, verseText);
290
+ nonWordVerseObjectCount = tokenizeText(vsObjText, newVerseObjects, wordMap, nonWordVerseObjectCount, verseText, parentIndex);
248
291
  }
249
292
  }
250
293
  return nonWordVerseObjectCount;