node-sword-interface 1.0.105 → 1.0.106
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
// Benchmark script for performance profiling
|
|
2
|
+
// Run with: node --prof scripts/benchmark.js
|
|
3
|
+
// Then process: node --prof-process isolate-*.log > profile.txt
|
|
4
|
+
const NodeSwordInterface = require('../index.js');
|
|
5
|
+
|
|
6
|
+
async function runBenchmark() {
|
|
7
|
+
const nsi = new NodeSwordInterface();
|
|
8
|
+
|
|
9
|
+
// Check if KJV is installed
|
|
10
|
+
const localModules = nsi.getAllLocalModules();
|
|
11
|
+
const kjvInstalled = localModules.some(m => m.name === 'KJV');
|
|
12
|
+
|
|
13
|
+
if (!kjvInstalled) {
|
|
14
|
+
console.log('KJV module is not installed. Please install it first.');
|
|
15
|
+
console.log('Available modules:', localModules.map(m => m.name).join(', '));
|
|
16
|
+
return;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
nsi.enableMarkup();
|
|
20
|
+
|
|
21
|
+
const iterations = 3;
|
|
22
|
+
const times = [];
|
|
23
|
+
let verseCount = 0;
|
|
24
|
+
|
|
25
|
+
console.log(`Running ${iterations} iterations of getBibleText("KJV") with markup enabled...`);
|
|
26
|
+
console.log('Use --prof flag to generate V8 profiler output\n');
|
|
27
|
+
|
|
28
|
+
for (let i = 0; i < iterations; i++) {
|
|
29
|
+
const start = process.hrtime.bigint();
|
|
30
|
+
const verses = nsi.getBibleText('KJV');
|
|
31
|
+
const end = process.hrtime.bigint();
|
|
32
|
+
|
|
33
|
+
const durationMs = Number(end - start) / 1_000_000;
|
|
34
|
+
times.push(durationMs);
|
|
35
|
+
verseCount = verses.length;
|
|
36
|
+
|
|
37
|
+
console.log(` Iteration ${i + 1}: ${durationMs.toFixed(2)} ms`);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const avg = times.reduce((a, b) => a + b, 0) / times.length;
|
|
41
|
+
const min = Math.min(...times);
|
|
42
|
+
const max = Math.max(...times);
|
|
43
|
+
|
|
44
|
+
console.log('\nBenchmark complete!');
|
|
45
|
+
console.log(` Verses processed: ${verseCount}`);
|
|
46
|
+
console.log(` Average time: ${avg.toFixed(2)} ms`);
|
|
47
|
+
console.log(` Min: ${min.toFixed(2)} ms, Max: ${max.toFixed(2)} ms`);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
runBenchmark().catch(console.error);
|
|
@@ -74,7 +74,6 @@ string TextProcessor::getFileUrl(const string& nativePath)
|
|
|
74
74
|
string TextProcessor::getFilteredText(const string& text, int chapter, int verseNr, bool hasStrongs, bool hasInconsistentClosingEndDivs, const string& moduleFileUrl)
|
|
75
75
|
{
|
|
76
76
|
static string chapterFilter = "<chapter";
|
|
77
|
-
static regex pbElement = regex("<pb .*?/> ");
|
|
78
77
|
|
|
79
78
|
static string lbBeginParagraph = "<lb type=\"x-begin-paragraph\"/>";
|
|
80
79
|
static string lbEndParagraph = "<lb type=\"x-end-paragraph\"/>";
|
|
@@ -117,11 +116,6 @@ string TextProcessor::getFilteredText(const string& text, int chapter, int verse
|
|
|
117
116
|
static string hiItalic = "<hi type=\"italic";
|
|
118
117
|
static string hiSuper = "<hi type=\"super";
|
|
119
118
|
|
|
120
|
-
static regex milestoneFilter = regex("<milestone.*?/>");
|
|
121
|
-
static regex segStartElementFilter = regex("<seg.*?>");
|
|
122
|
-
static regex divSectionElementFilter = regex("<div type=\"section\".*?>");
|
|
123
|
-
static regex selfClosingElement = regex("(<)([wdiv]{1,3}) ([\\w:=\"\\- ]*?)(/>)");
|
|
124
|
-
|
|
125
119
|
static string fullStopWithoutSpace = ".<";
|
|
126
120
|
static string questionMarkWithoutSpace = "?<";
|
|
127
121
|
static string exclamationMarkWithoutSpace = "!<";
|
|
@@ -130,9 +124,9 @@ string TextProcessor::getFilteredText(const string& text, int chapter, int verse
|
|
|
130
124
|
static string colonWithoutSpace = ":<";
|
|
131
125
|
|
|
132
126
|
string filteredText = text;
|
|
133
|
-
|
|
127
|
+
|
|
134
128
|
// Remove the first pbElement, because it prevents correctly replacing the first note in the next step
|
|
135
|
-
|
|
129
|
+
this->removePbElementsWithSpace(filteredText);
|
|
136
130
|
|
|
137
131
|
// Remove <note type="variant"> if it occurs in the beginning of the verse (applicable for NA28), because it has
|
|
138
132
|
// been observed that the note is not properly closed.
|
|
@@ -160,9 +154,11 @@ string TextProcessor::getFilteredText(const string& text, int chapter, int verse
|
|
|
160
154
|
this->findAndReplaceAll(filteredText, rtxtStartElementFilter2, "<div class=\"sword-markup sword-rtxt\" rend=");
|
|
161
155
|
this->findAndReplaceAll(filteredText, rtxtEndElementFilter, "</div>");
|
|
162
156
|
this->findAndReplaceAll(filteredText, pbElementFilter, "<pb class=\"sword-markup sword-pb\"");
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
157
|
+
|
|
158
|
+
this->replaceMilestoneLineElements(filteredText);
|
|
159
|
+
this->removeMilestoneElements(filteredText);
|
|
160
|
+
this->removeSegStartElements(filteredText);
|
|
161
|
+
this->removeDivSectionElements(filteredText);
|
|
166
162
|
|
|
167
163
|
stringstream sectionTitleElement;
|
|
168
164
|
sectionTitleElement << "<div class=\"sword-markup sword-section-title\" ";
|
|
@@ -204,7 +200,7 @@ string TextProcessor::getFilteredText(const string& text, int chapter, int verse
|
|
|
204
200
|
this->findAndReplaceAll(filteredText, hiItalic, "<hi class=\"italic");
|
|
205
201
|
this->findAndReplaceAll(filteredText, hiSuper, "<hi class=\"super");
|
|
206
202
|
|
|
207
|
-
|
|
203
|
+
this->expandSelfClosingElements(filteredText);
|
|
208
204
|
|
|
209
205
|
this->findAndReplaceAll(filteredText, fullStopWithoutSpace, ". <");
|
|
210
206
|
this->findAndReplaceAll(filteredText, questionMarkWithoutSpace, "? <");
|
|
@@ -316,6 +312,7 @@ string TextProcessor::getCurrentVerseText(sword::SWModule* module, bool hasStron
|
|
|
316
312
|
VerseKey currentVerseKey = module->getKey();
|
|
317
313
|
int currentChapter = currentVerseKey.getChapter();
|
|
318
314
|
int currentVerseNr = currentVerseKey.getVerse();
|
|
315
|
+
|
|
319
316
|
verseText = string(module->getRawEntry());
|
|
320
317
|
StringHelper::trim(verseText);
|
|
321
318
|
filteredText = verseText;
|
|
@@ -473,7 +470,7 @@ vector<Verse> TextProcessor::getText(string moduleName, string key, QueryLimit q
|
|
|
473
470
|
} else {
|
|
474
471
|
startVerseNumber = 1;
|
|
475
472
|
}
|
|
476
|
-
|
|
473
|
+
|
|
477
474
|
for (;;) {
|
|
478
475
|
VerseKey currentVerseKey(module->getKey());
|
|
479
476
|
string currentBookName(currentVerseKey.getBookAbbrev());
|
|
@@ -505,7 +502,7 @@ vector<Verse> TextProcessor::getText(string moduleName, string key, QueryLimit q
|
|
|
505
502
|
string chapterHeading = this->getCurrentChapterHeading(module, moduleFileUrl);
|
|
506
503
|
verseText += chapterHeading;
|
|
507
504
|
}
|
|
508
|
-
|
|
505
|
+
|
|
509
506
|
// Current verse text
|
|
510
507
|
verseText += this->getCurrentVerseText(module,
|
|
511
508
|
hasStrongs,
|
|
@@ -530,7 +527,9 @@ vector<Verse> TextProcessor::getText(string moduleName, string key, QueryLimit q
|
|
|
530
527
|
lastKey = currentKey;
|
|
531
528
|
lastBookName = currentBookName;
|
|
532
529
|
lastChapter = currentChapter;
|
|
530
|
+
|
|
533
531
|
module->increment();
|
|
532
|
+
|
|
534
533
|
index++;
|
|
535
534
|
}
|
|
536
535
|
}
|
|
@@ -727,3 +726,132 @@ unsigned int TextProcessor::findAndReplaceAll(std::string & data, std::string to
|
|
|
727
726
|
|
|
728
727
|
return count;
|
|
729
728
|
}
|
|
729
|
+
|
|
730
|
+
// Remove elements matching pattern: <prefix ... suffix>
|
|
731
|
+
// This is a string-based replacement for regex patterns like "<prefix.*?suffix>"
|
|
732
|
+
void TextProcessor::removeElementsByPrefixSuffix(std::string& data, const std::string& prefix, const std::string& suffix)
|
|
733
|
+
{
|
|
734
|
+
size_t pos = 0;
|
|
735
|
+
while ((pos = data.find(prefix, pos)) != std::string::npos) {
|
|
736
|
+
size_t endPos = data.find(suffix, pos + prefix.size());
|
|
737
|
+
if (endPos != std::string::npos) {
|
|
738
|
+
data.erase(pos, endPos + suffix.size() - pos);
|
|
739
|
+
} else {
|
|
740
|
+
break;
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
// Remove milestone elements with type="line" and replace with <br/>
|
|
746
|
+
void TextProcessor::replaceMilestoneLineElements(std::string& data)
|
|
747
|
+
{
|
|
748
|
+
static const std::string milestoneStart = "<milestone";
|
|
749
|
+
static const std::string typeLine = "type=\"line\"";
|
|
750
|
+
static const std::string milestoneEnd = "/>";
|
|
751
|
+
|
|
752
|
+
size_t pos = 0;
|
|
753
|
+
while ((pos = data.find(milestoneStart, pos)) != std::string::npos) {
|
|
754
|
+
size_t endPos = data.find(milestoneEnd, pos);
|
|
755
|
+
if (endPos != std::string::npos) {
|
|
756
|
+
size_t elementEnd = endPos + milestoneEnd.size();
|
|
757
|
+
std::string element = data.substr(pos, elementEnd - pos);
|
|
758
|
+
|
|
759
|
+
if (element.find(typeLine) != std::string::npos) {
|
|
760
|
+
data.replace(pos, elementEnd - pos, "<br/>");
|
|
761
|
+
pos += 5; // length of "<br/>"
|
|
762
|
+
} else {
|
|
763
|
+
pos = elementEnd;
|
|
764
|
+
}
|
|
765
|
+
} else {
|
|
766
|
+
break;
|
|
767
|
+
}
|
|
768
|
+
}
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
// Remove all milestone elements (those not already replaced)
|
|
772
|
+
void TextProcessor::removeMilestoneElements(std::string& data)
|
|
773
|
+
{
|
|
774
|
+
removeElementsByPrefixSuffix(data, "<milestone", "/>");
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
// Remove seg start elements: <seg...>
|
|
778
|
+
void TextProcessor::removeSegStartElements(std::string& data)
|
|
779
|
+
{
|
|
780
|
+
removeElementsByPrefixSuffix(data, "<seg", ">");
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
// Remove div section elements: <div type="section"...>
|
|
784
|
+
void TextProcessor::removeDivSectionElements(std::string& data)
|
|
785
|
+
{
|
|
786
|
+
static const std::string divSection = "<div type=\"section\"";
|
|
787
|
+
|
|
788
|
+
size_t pos = 0;
|
|
789
|
+
while ((pos = data.find(divSection, pos)) != std::string::npos) {
|
|
790
|
+
size_t endPos = data.find(">", pos + divSection.size());
|
|
791
|
+
if (endPos != std::string::npos) {
|
|
792
|
+
data.erase(pos, endPos + 1 - pos);
|
|
793
|
+
} else {
|
|
794
|
+
break;
|
|
795
|
+
}
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
// Expand self-closing elements: <w .../> -> <w ...></w> and <div .../> -> <div ...></div>
|
|
800
|
+
void TextProcessor::expandSelfClosingElements(std::string& data)
|
|
801
|
+
{
|
|
802
|
+
size_t pos = 0;
|
|
803
|
+
while (pos < data.size()) {
|
|
804
|
+
// Find next '<'
|
|
805
|
+
size_t startPos = data.find('<', pos);
|
|
806
|
+
if (startPos == std::string::npos) break;
|
|
807
|
+
|
|
808
|
+
// Check if it's <w or <div followed by space
|
|
809
|
+
bool isW = (data.compare(startPos, 3, "<w ") == 0);
|
|
810
|
+
bool isDiv = (data.compare(startPos, 5, "<div ") == 0);
|
|
811
|
+
|
|
812
|
+
if (isW || isDiv) {
|
|
813
|
+
// Find the end of the tag
|
|
814
|
+
size_t endPos = data.find('>', startPos);
|
|
815
|
+
if (endPos != std::string::npos && endPos > startPos + 1) {
|
|
816
|
+
// Check if it's self-closing (ends with />)
|
|
817
|
+
if (data[endPos - 1] == '/') {
|
|
818
|
+
// It's self-closing, expand it
|
|
819
|
+
std::string tagName = isW ? "w" : "div";
|
|
820
|
+
std::string closingTag = "</" + tagName + ">";
|
|
821
|
+
|
|
822
|
+
// Remove the / before >
|
|
823
|
+
data.erase(endPos - 1, 1);
|
|
824
|
+
endPos--; // Adjust for removed character
|
|
825
|
+
|
|
826
|
+
// Insert closing tag after >
|
|
827
|
+
data.insert(endPos + 1, closingTag);
|
|
828
|
+
|
|
829
|
+
pos = endPos + 1 + closingTag.size();
|
|
830
|
+
} else {
|
|
831
|
+
pos = endPos + 1;
|
|
832
|
+
}
|
|
833
|
+
} else {
|
|
834
|
+
pos = startPos + 1;
|
|
835
|
+
}
|
|
836
|
+
} else {
|
|
837
|
+
pos = startPos + 1;
|
|
838
|
+
}
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
// Remove pb elements with trailing space: "<pb .../> "
|
|
843
|
+
void TextProcessor::removePbElementsWithSpace(std::string& data)
|
|
844
|
+
{
|
|
845
|
+
static const std::string pbStart = "<pb ";
|
|
846
|
+
static const std::string pbEnd = "/> ";
|
|
847
|
+
|
|
848
|
+
size_t pos = 0;
|
|
849
|
+
while ((pos = data.find(pbStart, pos)) != std::string::npos) {
|
|
850
|
+
size_t endPos = data.find(pbEnd, pos);
|
|
851
|
+
if (endPos != std::string::npos) {
|
|
852
|
+
data.erase(pos, endPos + pbEnd.size() - pos);
|
|
853
|
+
} else {
|
|
854
|
+
break;
|
|
855
|
+
}
|
|
856
|
+
}
|
|
857
|
+
}
|
|
@@ -70,6 +70,15 @@ private:
|
|
|
70
70
|
std::string replaceSpacesInStrongs(const std::string& text);
|
|
71
71
|
unsigned int findAndReplaceAll(std::string & data, std::string toSearch, std::string replaceStr);
|
|
72
72
|
|
|
73
|
+
// String-based replacements for regex operations (performance optimization)
|
|
74
|
+
void removeElementsByPrefixSuffix(std::string& data, const std::string& prefix, const std::string& suffix);
|
|
75
|
+
void replaceMilestoneLineElements(std::string& data);
|
|
76
|
+
void removeMilestoneElements(std::string& data);
|
|
77
|
+
void removeSegStartElements(std::string& data);
|
|
78
|
+
void removeDivSectionElements(std::string& data);
|
|
79
|
+
void expandSelfClosingElements(std::string& data);
|
|
80
|
+
void removePbElementsWithSpace(std::string& data);
|
|
81
|
+
|
|
73
82
|
std::string getBookFromReference(std::string reference);
|
|
74
83
|
std::vector<std::string> getBookListFromReferences(std::vector<std::string>& references);
|
|
75
84
|
|