@helloao/cli 0.0.19 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cjs/cli.cjs CHANGED
@@ -12333,7 +12333,10 @@ var verseSchema = external_exports.object({
12333
12333
  id: external_exports.string(),
12334
12334
  bookCode: external_exports.string().optional(),
12335
12335
  chapter: external_exports.number().optional(),
12336
- verse: external_exports.number().optional()
12336
+ verse: external_exports.number().optional(),
12337
+ data: external_exports.object({
12338
+ globalReferences: external_exports.array(external_exports.string()).optional()
12339
+ }).optional()
12337
12340
  });
12338
12341
  var paratextSchema = external_exports.object({
12339
12342
  type: external_exports.literal("paratext"),
@@ -12363,6 +12366,10 @@ var CodexParser = class {
12363
12366
  // */
12364
12367
  // preserveMarkdown: boolean = true;
12365
12368
  _noteCounter = 0;
12369
+ _parser;
12370
+ constructor(parser) {
12371
+ this._parser = parser;
12372
+ }
12366
12373
  /**
12367
12374
  * Parses the specified codex content.
12368
12375
  *
@@ -12378,7 +12385,7 @@ var CodexParser = class {
12378
12385
  };
12379
12386
  let chapters = /* @__PURE__ */ new Map();
12380
12387
  let lastChapter = null;
12381
- function addChapterContent(chapterNumber, content) {
12388
+ function ensureChapter(chapterNumber) {
12382
12389
  let chapter = chapters.get(chapterNumber);
12383
12390
  if (!chapter) {
12384
12391
  chapter = {
@@ -12390,8 +12397,23 @@ var CodexParser = class {
12390
12397
  lastChapter = chapter;
12391
12398
  chapters.set(chapterNumber, chapter);
12392
12399
  }
12400
+ return chapter;
12401
+ }
12402
+ function getDocumentContent(doc) {
12403
+ let content = "";
12404
+ for (let element of doc.childNodes) {
12405
+ content += element.textContent;
12406
+ }
12407
+ return content;
12408
+ }
12409
+ function addChapterContent(chapterNumber, content) {
12410
+ const chapter = ensureChapter(chapterNumber);
12393
12411
  chapter.content.push(...content);
12394
12412
  }
12413
+ function addChapterFootnote(chapterNumber, footnote) {
12414
+ const chapter = ensureChapter(chapterNumber);
12415
+ chapter.footnotes.push(footnote);
12416
+ }
12395
12417
  function addReference(ref2, content) {
12396
12418
  addChapterContent(ref2.chapter, [
12397
12419
  {
@@ -12413,8 +12435,12 @@ var CodexParser = class {
12413
12435
  function addLineBreaks(lines2) {
12414
12436
  return lines2.reduce(
12415
12437
  (prev, current) => {
12416
- if (prev.length === 0) return [current];
12417
- else
12438
+ if (typeof current === "string" && current.trim() === "") {
12439
+ return prev;
12440
+ }
12441
+ if (prev.length === 0) {
12442
+ return [current];
12443
+ } else {
12418
12444
  return [
12419
12445
  ...prev,
12420
12446
  {
@@ -12422,6 +12448,7 @@ var CodexParser = class {
12422
12448
  },
12423
12449
  current
12424
12450
  ];
12451
+ }
12425
12452
  },
12426
12453
  []
12427
12454
  );
@@ -12466,6 +12493,15 @@ var CodexParser = class {
12466
12493
  "Could not find verse reference in metadata."
12467
12494
  );
12468
12495
  }
12496
+ } else if (metadata.data?.globalReferences?.length) {
12497
+ reference = parseVerseReference(
12498
+ metadata.data.globalReferences[0]
12499
+ );
12500
+ if (!reference) {
12501
+ throw new Error(
12502
+ "Could not find verse reference in globalReferences metadata."
12503
+ );
12504
+ }
12469
12505
  } else {
12470
12506
  throw new Error(
12471
12507
  "Could not find verse reference in metadata."
@@ -12478,13 +12514,49 @@ var CodexParser = class {
12478
12514
  }
12479
12515
  previousReference = reference;
12480
12516
  if (!root.id) root.id = reference.book;
12481
- const content = stripHTML(cell.value);
12482
- const newLines = content.split("\n");
12483
- lines.push(...newLines);
12517
+ const doc = this._parser.parseFromString(
12518
+ cell.value,
12519
+ "text/html"
12520
+ );
12521
+ for (let element of doc.children) {
12522
+ const allFootnotes = element.querySelectorAll(
12523
+ "sup.footnote-marker[data-footnote]"
12524
+ );
12525
+ for (const footnote of allFootnotes) {
12526
+ const footnoteData = footnote.getAttribute("data-footnote");
12527
+ if (footnoteData) {
12528
+ const footnoteDoc = this._parser.parseFromString(
12529
+ footnoteData,
12530
+ "text/html"
12531
+ );
12532
+ const footnoteText = getDocumentContent(footnoteDoc);
12533
+ if (footnoteText) {
12534
+ addChapterFootnote(reference.chapter, {
12535
+ noteId: this._noteCounter++,
12536
+ text: footnoteText,
12537
+ caller: null
12538
+ });
12539
+ }
12540
+ }
12541
+ footnote.remove();
12542
+ }
12543
+ }
12544
+ const content = getDocumentContent(doc);
12545
+ if (content) {
12546
+ const newLines = content.split("\n");
12547
+ lines.push(...newLines);
12548
+ }
12484
12549
  } else if (metadata.type === "paratext") {
12485
12550
  const reference = parseVerseReference(`${metadata.id}:1`);
12486
12551
  if (reference) {
12487
- const content = stripHTML(cell.value);
12552
+ const doc = this._parser.parseFromString(
12553
+ cell.value,
12554
+ "text/html"
12555
+ );
12556
+ const content = getDocumentContent(doc);
12557
+ if (!content) {
12558
+ continue;
12559
+ }
12488
12560
  const lines2 = content.split("\n").reduce((prev, current) => {
12489
12561
  if (prev.length === 0)
12490
12562
  return [toHeading(current)];
@@ -12501,8 +12573,12 @@ var CodexParser = class {
12501
12573
  (line) => addChapterContent(reference.chapter, [line])
12502
12574
  );
12503
12575
  } else {
12504
- const content = stripHTML(cell.value);
12505
- if (!cell.metadata) {
12576
+ const doc = this._parser.parseFromString(
12577
+ cell.value,
12578
+ "text/html"
12579
+ );
12580
+ const content = getDocumentContent(doc);
12581
+ if (content && !cell.metadata) {
12506
12582
  if (lastChapter) {
12507
12583
  lastChapter.footnotes.push({
12508
12584
  noteId: this._noteCounter++,
@@ -12816,7 +12892,7 @@ function generateDataset(files, parser = new globalThis.DOMParser(), bookMap) {
12816
12892
  };
12817
12893
  let usfmParser = new UsfmParser();
12818
12894
  let usxParser = new USXParser(parser);
12819
- let codexParser = new CodexParser();
12895
+ let codexParser = new CodexParser(parser);
12820
12896
  let csvCommentaryParser = new CommentaryCsvParser();
12821
12897
  let tyndaleXmlParser = new TyndaleXmlParser(parser);
12822
12898
  let parsedTranslations = /* @__PURE__ */ new Map();
@@ -11781,7 +11781,10 @@ var verseSchema = external_exports.object({
11781
11781
  id: external_exports.string(),
11782
11782
  bookCode: external_exports.string().optional(),
11783
11783
  chapter: external_exports.number().optional(),
11784
- verse: external_exports.number().optional()
11784
+ verse: external_exports.number().optional(),
11785
+ data: external_exports.object({
11786
+ globalReferences: external_exports.array(external_exports.string()).optional()
11787
+ }).optional()
11785
11788
  });
11786
11789
  var paratextSchema = external_exports.object({
11787
11790
  type: external_exports.literal("paratext"),
@@ -11811,6 +11814,10 @@ var CodexParser = class {
11811
11814
  // */
11812
11815
  // preserveMarkdown: boolean = true;
11813
11816
  _noteCounter = 0;
11817
+ _parser;
11818
+ constructor(parser) {
11819
+ this._parser = parser;
11820
+ }
11814
11821
  /**
11815
11822
  * Parses the specified codex content.
11816
11823
  *
@@ -11826,7 +11833,7 @@ var CodexParser = class {
11826
11833
  };
11827
11834
  let chapters = /* @__PURE__ */ new Map();
11828
11835
  let lastChapter = null;
11829
- function addChapterContent(chapterNumber, content) {
11836
+ function ensureChapter(chapterNumber) {
11830
11837
  let chapter = chapters.get(chapterNumber);
11831
11838
  if (!chapter) {
11832
11839
  chapter = {
@@ -11838,8 +11845,23 @@ var CodexParser = class {
11838
11845
  lastChapter = chapter;
11839
11846
  chapters.set(chapterNumber, chapter);
11840
11847
  }
11848
+ return chapter;
11849
+ }
11850
+ function getDocumentContent(doc) {
11851
+ let content = "";
11852
+ for (let element of doc.childNodes) {
11853
+ content += element.textContent;
11854
+ }
11855
+ return content;
11856
+ }
11857
+ function addChapterContent(chapterNumber, content) {
11858
+ const chapter = ensureChapter(chapterNumber);
11841
11859
  chapter.content.push(...content);
11842
11860
  }
11861
+ function addChapterFootnote(chapterNumber, footnote) {
11862
+ const chapter = ensureChapter(chapterNumber);
11863
+ chapter.footnotes.push(footnote);
11864
+ }
11843
11865
  function addReference(ref2, content) {
11844
11866
  addChapterContent(ref2.chapter, [
11845
11867
  {
@@ -11861,8 +11883,12 @@ var CodexParser = class {
11861
11883
  function addLineBreaks(lines2) {
11862
11884
  return lines2.reduce(
11863
11885
  (prev, current) => {
11864
- if (prev.length === 0) return [current];
11865
- else
11886
+ if (typeof current === "string" && current.trim() === "") {
11887
+ return prev;
11888
+ }
11889
+ if (prev.length === 0) {
11890
+ return [current];
11891
+ } else {
11866
11892
  return [
11867
11893
  ...prev,
11868
11894
  {
@@ -11870,6 +11896,7 @@ var CodexParser = class {
11870
11896
  },
11871
11897
  current
11872
11898
  ];
11899
+ }
11873
11900
  },
11874
11901
  []
11875
11902
  );
@@ -11914,6 +11941,15 @@ var CodexParser = class {
11914
11941
  "Could not find verse reference in metadata."
11915
11942
  );
11916
11943
  }
11944
+ } else if (metadata.data?.globalReferences?.length) {
11945
+ reference = parseVerseReference(
11946
+ metadata.data.globalReferences[0]
11947
+ );
11948
+ if (!reference) {
11949
+ throw new Error(
11950
+ "Could not find verse reference in globalReferences metadata."
11951
+ );
11952
+ }
11917
11953
  } else {
11918
11954
  throw new Error(
11919
11955
  "Could not find verse reference in metadata."
@@ -11926,13 +11962,49 @@ var CodexParser = class {
11926
11962
  }
11927
11963
  previousReference = reference;
11928
11964
  if (!root.id) root.id = reference.book;
11929
- const content = stripHTML(cell.value);
11930
- const newLines = content.split("\n");
11931
- lines.push(...newLines);
11965
+ const doc = this._parser.parseFromString(
11966
+ cell.value,
11967
+ "text/html"
11968
+ );
11969
+ for (let element of doc.children) {
11970
+ const allFootnotes = element.querySelectorAll(
11971
+ "sup.footnote-marker[data-footnote]"
11972
+ );
11973
+ for (const footnote of allFootnotes) {
11974
+ const footnoteData = footnote.getAttribute("data-footnote");
11975
+ if (footnoteData) {
11976
+ const footnoteDoc = this._parser.parseFromString(
11977
+ footnoteData,
11978
+ "text/html"
11979
+ );
11980
+ const footnoteText = getDocumentContent(footnoteDoc);
11981
+ if (footnoteText) {
11982
+ addChapterFootnote(reference.chapter, {
11983
+ noteId: this._noteCounter++,
11984
+ text: footnoteText,
11985
+ caller: null
11986
+ });
11987
+ }
11988
+ }
11989
+ footnote.remove();
11990
+ }
11991
+ }
11992
+ const content = getDocumentContent(doc);
11993
+ if (content) {
11994
+ const newLines = content.split("\n");
11995
+ lines.push(...newLines);
11996
+ }
11932
11997
  } else if (metadata.type === "paratext") {
11933
11998
  const reference = parseVerseReference(`${metadata.id}:1`);
11934
11999
  if (reference) {
11935
- const content = stripHTML(cell.value);
12000
+ const doc = this._parser.parseFromString(
12001
+ cell.value,
12002
+ "text/html"
12003
+ );
12004
+ const content = getDocumentContent(doc);
12005
+ if (!content) {
12006
+ continue;
12007
+ }
11936
12008
  const lines2 = content.split("\n").reduce((prev, current) => {
11937
12009
  if (prev.length === 0)
11938
12010
  return [toHeading(current)];
@@ -11949,8 +12021,12 @@ var CodexParser = class {
11949
12021
  (line) => addChapterContent(reference.chapter, [line])
11950
12022
  );
11951
12023
  } else {
11952
- const content = stripHTML(cell.value);
11953
- if (!cell.metadata) {
12024
+ const doc = this._parser.parseFromString(
12025
+ cell.value,
12026
+ "text/html"
12027
+ );
12028
+ const content = getDocumentContent(doc);
12029
+ if (content && !cell.metadata) {
11954
12030
  if (lastChapter) {
11955
12031
  lastChapter.footnotes.push({
11956
12032
  noteId: this._noteCounter++,
@@ -12264,7 +12340,7 @@ function generateDataset(files, parser = new globalThis.DOMParser(), bookMap) {
12264
12340
  };
12265
12341
  let usfmParser = new UsfmParser();
12266
12342
  let usxParser = new USXParser(parser);
12267
- let codexParser = new CodexParser();
12343
+ let codexParser = new CodexParser(parser);
12268
12344
  let csvCommentaryParser = new CommentaryCsvParser();
12269
12345
  let tyndaleXmlParser = new TyndaleXmlParser(parser);
12270
12346
  let parsedTranslations = /* @__PURE__ */ new Map();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@helloao/cli",
3
- "version": "0.0.19",
3
+ "version": "0.1.0",
4
4
  "description": "A CLI and related tools for managing HelloAO's Free Bible API",
5
5
  "main": "./dist/cjs/index.cjs",
6
6
  "module": "./dist/esm/index.js",
@@ -43,7 +43,7 @@
43
43
  "all-iso-language-codes": "1.0.17",
44
44
  "papaparse": "5.4.1",
45
45
  "luxon": "3.5.0",
46
- "@helloao/tools": "0.0.19"
46
+ "@helloao/tools": "0.1.0"
47
47
  },
48
48
  "files": [
49
49
  "/README.md",