@gmb/bitmark-parser-generator 5.15.0 → 5.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -10920,7 +10920,7 @@ var instance2 = new Config();
10920
10920
  // src/generated/package_info.ts
10921
10921
  var PACKAGE_INFO = {
10922
10922
  "name": "@gmb/bitmark-parser-generator",
10923
- "version": "5.15.0",
10923
+ "version": "5.16.0",
10924
10924
  "author": "Get More Brain Ltd",
10925
10925
  "license": "ISC",
10926
10926
  "description": "A bitmark parser and generator using Peggy.js"
@@ -39743,6 +39743,265 @@ var JsonFileGenerator = class {
39743
39743
  }
39744
39744
  };
39745
39745
 
39746
+ // src/generator/plainText/PlainTextGenerator.ts
39747
+ var TEXT_NODE_TYPES = new Set(Object.values(TextNodeType));
39748
+ var PlainTextGenerator = class {
39749
+ /**
39750
+ * Generate plain text from a string or JSON object.
39751
+ *
39752
+ * @param input - A string (plain or JSON-encoded) or a parsed JSON value.
39753
+ * @returns The extracted plain text.
39754
+ */
39755
+ generate(input) {
39756
+ let data = input;
39757
+ if (typeof data === "string") {
39758
+ try {
39759
+ data = JSON.parse(data);
39760
+ } catch (_e) {
39761
+ return data;
39762
+ }
39763
+ }
39764
+ return this.walk(data).trim();
39765
+ }
39766
+ // ---------------------------------------------------------------------------
39767
+ // Private helpers
39768
+ // ---------------------------------------------------------------------------
39769
+ walk(value) {
39770
+ if (value == null) return "";
39771
+ if (typeof value === "string") return value;
39772
+ if (typeof value !== "object") return "";
39773
+ if (Array.isArray(value)) {
39774
+ if (value.length === 0) return "";
39775
+ if (this.isTextAst(value)) {
39776
+ return this.textAstToPlainText(value);
39777
+ }
39778
+ return value.map((v) => this.walk(v)).filter(Boolean).join("\n");
39779
+ }
39780
+ const obj = value;
39781
+ if (this.isTextNode(obj)) {
39782
+ return this.textNodeToPlainText(obj);
39783
+ }
39784
+ if (this.isBitWrapper(obj)) {
39785
+ return this.walk(obj["bit"]);
39786
+ }
39787
+ const parts = [];
39788
+ for (const val of Object.values(obj)) {
39789
+ if (val == null || typeof val !== "object") continue;
39790
+ const text = this.walk(val);
39791
+ if (text) parts.push(text);
39792
+ }
39793
+ return parts.join("\n");
39794
+ }
39795
+ // -- Type guards -----------------------------------------------------------
39796
+ isTextNode(obj) {
39797
+ return typeof obj["type"] === "string" && TEXT_NODE_TYPES.has(obj["type"]);
39798
+ }
39799
+ isTextAst(arr) {
39800
+ const first = arr[0];
39801
+ return typeof first === "object" && first !== null && !Array.isArray(first) && this.isTextNode(first);
39802
+ }
39803
+ isBitWrapper(obj) {
39804
+ return "bit" in obj && typeof obj["bit"] === "object" && obj["bit"] !== null && !Array.isArray(obj["bit"]);
39805
+ }
39806
+ // -- TextNode extraction ---------------------------------------------------
39807
+ textAstToPlainText(ast) {
39808
+ return ast.map((node) => this.textNodeToPlainText(node)).join("\n");
39809
+ }
39810
+ textNodeToPlainText(node) {
39811
+ const { type, text, content } = node;
39812
+ switch (type) {
39813
+ case TextNodeType.text:
39814
+ return this.textWithMarks(node);
39815
+ case TextNodeType.hardBreak:
39816
+ return "\n";
39817
+ // Block elements whose children are joined without extra separator
39818
+ case TextNodeType.paragraph:
39819
+ case TextNodeType.heading:
39820
+ case TextNodeType.section:
39821
+ case TextNodeType.gap:
39822
+ case TextNodeType.select:
39823
+ case TextNodeType.highlight:
39824
+ case TextNodeType.mark:
39825
+ case TextNodeType.codeBlock:
39826
+ return content ? content.map((c) => this.textNodeToPlainText(c)).join("") : text ?? "";
39827
+ // List items are handled by listToPlainText with indent context
39828
+ case TextNodeType.listItem:
39829
+ return content ? content.map((c) => this.textNodeToPlainText(c)).join("") : text ?? "";
39830
+ // Task item – handled by taskListToPlainText, but fallback if encountered standalone
39831
+ case TextNodeType.taskItem: {
39832
+ const checked = node.attrs?.checked ?? false;
39833
+ const prefix = checked ? "[x] " : "[ ] ";
39834
+ const itemText = content ? content.map((c) => this.textNodeToPlainText(c)).join("") : text ?? "";
39835
+ return `${prefix}${itemText}`;
39836
+ }
39837
+ // List containers – rendered with indent-aware helper
39838
+ case TextNodeType.noBulletList:
39839
+ case TextNodeType.bulletList:
39840
+ case TextNodeType.orderedList:
39841
+ case TextNodeType.orderedListRoman:
39842
+ case TextNodeType.orderedListRomanLower:
39843
+ case TextNodeType.letteredList:
39844
+ case TextNodeType.letteredListLower:
39845
+ return this.listToPlainText(node, 0);
39846
+ // Task list – rendered with indent-aware helper
39847
+ case TextNodeType.taskList:
39848
+ return this.taskListToPlainText(node, 0);
39849
+ // Images – return alt text when available
39850
+ case TextNodeType.image:
39851
+ case TextNodeType.imageInline: {
39852
+ const attrs = node.attrs;
39853
+ return attrs?.alt ?? "";
39854
+ }
39855
+ // LaTeX – return the formula source
39856
+ case TextNodeType.latex: {
39857
+ const latexAttrs = node.attrs;
39858
+ return latexAttrs?.formula ?? "";
39859
+ }
39860
+ default:
39861
+ return content ? content.map((c) => this.textNodeToPlainText(c)).join("") : text ?? "";
39862
+ }
39863
+ }
39864
+ listToPlainText(node, depth) {
39865
+ const { type, content } = node;
39866
+ if (!content || content.length === 0) return "";
39867
+ const indent = " ".repeat(depth);
39868
+ const start = node.attrs?.start ?? 1;
39869
+ const displayStart = start < 1 ? start + 1 : start;
39870
+ return content.map((child, i) => {
39871
+ const { inline, nested } = this.splitListItemContent(child, depth);
39872
+ const prefix = this.listItemPrefix(type, displayStart + i);
39873
+ const line = `${indent}${prefix}${inline}`;
39874
+ return nested ? `${line}
39875
+ ${nested}` : line;
39876
+ }).join("\n");
39877
+ }
39878
+ taskListToPlainText(node, depth) {
39879
+ const { content } = node;
39880
+ if (!content || content.length === 0) return "";
39881
+ const indent = " ".repeat(depth);
39882
+ return content.map((child) => {
39883
+ const checked = child.attrs?.checked ?? false;
39884
+ const prefix = checked ? "[x] " : "[ ] ";
39885
+ const { inline, nested } = this.splitListItemContent(child, depth);
39886
+ const line = `${indent}${prefix}${inline}`;
39887
+ return nested ? `${line}
39888
+ ${nested}` : line;
39889
+ }).join("\n");
39890
+ }
39891
+ splitListItemContent(item, depth) {
39892
+ const children = item.content ?? [];
39893
+ const inlineParts = [];
39894
+ const nestedParts = [];
39895
+ for (const child of children) {
39896
+ if (this.isListType(child.type)) {
39897
+ nestedParts.push(this.renderNestedList(child, depth + 1));
39898
+ } else {
39899
+ inlineParts.push(this.textNodeToPlainText(child));
39900
+ }
39901
+ }
39902
+ return {
39903
+ inline: inlineParts.join(""),
39904
+ nested: nestedParts.join("\n")
39905
+ };
39906
+ }
39907
+ isListType(type) {
39908
+ return type === TextNodeType.bulletList || type === TextNodeType.orderedList || type === TextNodeType.orderedListRoman || type === TextNodeType.orderedListRomanLower || type === TextNodeType.letteredList || type === TextNodeType.letteredListLower || type === TextNodeType.noBulletList || type === TextNodeType.taskList;
39909
+ }
39910
+ renderNestedList(node, depth) {
39911
+ if (node.type === TextNodeType.taskList) {
39912
+ return this.taskListToPlainText(node, depth);
39913
+ }
39914
+ return this.listToPlainText(node, depth);
39915
+ }
39916
+ listItemPrefix(listType, index) {
39917
+ switch (listType) {
39918
+ case TextNodeType.bulletList:
39919
+ return "\u2022 ";
39920
+ case TextNodeType.orderedList:
39921
+ return `${index}. `;
39922
+ case TextNodeType.orderedListRoman:
39923
+ return `${this.toRoman(index)}. `;
39924
+ case TextNodeType.orderedListRomanLower:
39925
+ return `${this.toRoman(index).toLowerCase()}. `;
39926
+ case TextNodeType.letteredList:
39927
+ return `${this.toLetter(index)}. `;
39928
+ case TextNodeType.letteredListLower:
39929
+ return `${this.toLetter(index).toLowerCase()}. `;
39930
+ case TextNodeType.noBulletList:
39931
+ default:
39932
+ return "";
39933
+ }
39934
+ }
39935
+ toRoman(num) {
39936
+ const romanNumerals = [
39937
+ [1e3, "M"],
39938
+ [900, "CM"],
39939
+ [500, "D"],
39940
+ [400, "CD"],
39941
+ [100, "C"],
39942
+ [90, "XC"],
39943
+ [50, "L"],
39944
+ [40, "XL"],
39945
+ [10, "X"],
39946
+ [9, "IX"],
39947
+ [5, "V"],
39948
+ [4, "IV"],
39949
+ [1, "I"]
39950
+ ];
39951
+ let result = "";
39952
+ let remaining = num;
39953
+ for (const [value, numeral] of romanNumerals) {
39954
+ while (remaining >= value) {
39955
+ result += numeral;
39956
+ remaining -= value;
39957
+ }
39958
+ }
39959
+ return result;
39960
+ }
39961
+ toLetter(num) {
39962
+ let result = "";
39963
+ let remaining = num;
39964
+ while (remaining > 0) {
39965
+ remaining--;
39966
+ result = String.fromCharCode(65 + remaining % 26) + result;
39967
+ remaining = Math.floor(remaining / 26);
39968
+ }
39969
+ return result;
39970
+ }
39971
+ textWithMarks(node) {
39972
+ const { text, marks } = node;
39973
+ const parts = [];
39974
+ const linkMark = marks?.find((m) => m.type === "link");
39975
+ const href = linkMark?.attrs?.href;
39976
+ if (text && href && text !== href) {
39977
+ const hrefBare = href.replace(/^https?:\/\//, "");
39978
+ if (text.includes(hrefBare)) {
39979
+ parts.push(text.replace(hrefBare, href));
39980
+ } else if (text.includes(href)) {
39981
+ parts.push(text);
39982
+ } else {
39983
+ parts.push(`${text} ${href}`);
39984
+ }
39985
+ } else if (text) {
39986
+ parts.push(text);
39987
+ } else if (href) {
39988
+ parts.push(href);
39989
+ }
39990
+ if (marks) {
39991
+ for (const mark of marks) {
39992
+ if (mark.type === "footnote") {
39993
+ const footnote = mark;
39994
+ if (footnote.attrs?.content) {
39995
+ const footnoteText = footnote.attrs.content.map((c) => this.textNodeToPlainText(c)).join("");
39996
+ if (footnoteText) parts.push(footnoteText);
39997
+ }
39998
+ }
39999
+ }
40000
+ }
40001
+ return parts.join(" ");
40002
+ }
40003
+ };
40004
+
39746
40005
  // src/info/ConfigBuilder.ts
39747
40006
  import path3 from "path";
39748
40007
  import { Enum as Enum22 } from "@ncoderz/superenum";
@@ -40452,7 +40711,25 @@ var Output = {
40452
40711
  /**
40453
40712
  * Output AST as a plain JS object, or a file
40454
40713
  */
40455
- ast: "ast"
40714
+ ast: "ast",
40715
+ /**
40716
+ * Output plain text as a string, or a file
40717
+ */
40718
+ text: "text"
40719
+ };
40720
+ var InputFormat = {
40721
+ /**
40722
+ * Input is bitmark
40723
+ */
40724
+ bitmark: "bitmark",
40725
+ /**
40726
+ * Input is bitmarkText
40727
+ */
40728
+ bitmarkText: "bitmarkText",
40729
+ /**
40730
+ * Input is plain text
40731
+ */
40732
+ plainText: "plainText"
40456
40733
  };
40457
40734
  var BitmarkParserGenerator = class {
40458
40735
  constructor() {
@@ -40554,6 +40831,12 @@ var BitmarkParserGenerator = class {
40554
40831
  * - input(JSON/AST) ==> output(bitmark)
40555
40832
  * - input(bitmark) ==> output(JSON)
40556
40833
  *
40834
+ * Output type can be overridden to one of the following:
40835
+ * - bitmark: output bitmark string
40836
+ * - json: output JSON as a plain JS object, or a file
40837
+ * - ast: output AST as a plain JS object, or a file
40838
+ * - text: output plain text as a string, or a file
40839
+ *
40557
40840
  * By default, the result is returned as a string for bitmark, or a plain JS object for JSON/AST.
40558
40841
  *
40559
40842
  * The options can be used to write the output to a file and to set conversion options or override defaults.
@@ -40576,6 +40859,7 @@ var BitmarkParserGenerator = class {
40576
40859
  const outputBitmark = outputFormat === Output.bitmark;
40577
40860
  const outputJson = outputFormat === Output.json;
40578
40861
  const outputAst = outputFormat === Output.ast;
40862
+ const outputText = outputFormat === Output.text;
40579
40863
  const bitmarkParserType = BitmarkParserType.peggy;
40580
40864
  let inStr = input;
40581
40865
  const inputIsString = typeof input === "string";
@@ -40622,6 +40906,22 @@ var BitmarkParserGenerator = class {
40622
40906
  }
40623
40907
  }
40624
40908
  };
40909
+ const bitmarkToText = (bitmarkStr) => {
40910
+ ast = this.bitmarkParser.toAst(bitmarkStr, {
40911
+ parserType: bitmarkParserType
40912
+ });
40913
+ const jsonGenerator = new JsonObjectGenerator(opts);
40914
+ const json = jsonGenerator.generateSync(ast);
40915
+ const textGenerator = new PlainTextGenerator();
40916
+ const str = textGenerator.generate(json);
40917
+ if (opts.outputFile) {
40918
+ fs4.writeFileSync(opts.outputFile, str, {
40919
+ encoding: "utf8"
40920
+ });
40921
+ } else {
40922
+ res = str;
40923
+ }
40924
+ };
40625
40925
  const astToBitmark = (astJson) => {
40626
40926
  if (opts.outputFile) {
40627
40927
  const generator = new BitmarkFileGenerator(opts.outputFile, opts);
@@ -40644,6 +40944,19 @@ var BitmarkParserGenerator = class {
40644
40944
  res = this.jsonStringifyPrettify(json, jsonOptions);
40645
40945
  }
40646
40946
  };
40947
+ const astToText = (astJson) => {
40948
+ const jsonGenerator = new JsonObjectGenerator(opts);
40949
+ const json = jsonGenerator.generateSync(astJson);
40950
+ const textGenerator = new PlainTextGenerator();
40951
+ const str = textGenerator.generate(json);
40952
+ if (opts.outputFile) {
40953
+ fs4.writeFileSync(opts.outputFile, str, {
40954
+ encoding: "utf8"
40955
+ });
40956
+ } else {
40957
+ res = str;
40958
+ }
40959
+ };
40647
40960
  const jsonToBitmark = (astJson) => {
40648
40961
  if (opts.outputFile) {
40649
40962
  const generator = new BitmarkFileGenerator(opts.outputFile, opts);
@@ -40656,6 +40969,19 @@ var BitmarkParserGenerator = class {
40656
40969
  const jsonToAst = (astJson) => {
40657
40970
  res = this.jsonStringifyPrettify(astJson, jsonOptions);
40658
40971
  };
40972
+ const jsonToText = (astJson) => {
40973
+ const jsonGenerator = new JsonObjectGenerator(opts);
40974
+ const json = jsonGenerator.generateSync(astJson);
40975
+ const textGenerator = new PlainTextGenerator();
40976
+ const str = textGenerator.generate(json);
40977
+ if (opts.outputFile) {
40978
+ fs4.writeFileSync(opts.outputFile, str, {
40979
+ encoding: "utf8"
40980
+ });
40981
+ } else {
40982
+ res = str;
40983
+ }
40984
+ };
40659
40985
  const jsonToJson = (astJson) => {
40660
40986
  astToJson(astJson);
40661
40987
  };
@@ -40664,6 +40990,8 @@ var BitmarkParserGenerator = class {
40664
40990
  bitmarkToBitmark(inStr);
40665
40991
  } else if (outputAst) {
40666
40992
  bitmarkToAst(inStr);
40993
+ } else if (outputText) {
40994
+ bitmarkToText(inStr);
40667
40995
  } else {
40668
40996
  bitmarkToJson(inStr);
40669
40997
  }
@@ -40673,6 +41001,8 @@ var BitmarkParserGenerator = class {
40673
41001
  astToAst(ast);
40674
41002
  } else if (outputJson) {
40675
41003
  astToJson(ast);
41004
+ } else if (outputText) {
41005
+ astToText(ast);
40676
41006
  } else {
40677
41007
  astToBitmark(ast);
40678
41008
  }
@@ -40682,6 +41012,8 @@ var BitmarkParserGenerator = class {
40682
41012
  jsonToJson(ast);
40683
41013
  } else if (outputAst) {
40684
41014
  jsonToAst(ast);
41015
+ } else if (outputText) {
41016
+ jsonToText(ast);
40685
41017
  } else {
40686
41018
  jsonToBitmark(ast);
40687
41019
  }
@@ -40810,7 +41142,7 @@ var BitmarkParserGenerator = class {
40810
41142
  return res;
40811
41143
  }
40812
41144
  /**
40813
- * Convert bitmark text from JSON, or JSON to bitmark text.
41145
+ * Convert bitmark text to JSON, or JSON to bitmark text.
40814
41146
  *
40815
41147
  * Input type is detected automatically and may be:
40816
41148
  * - string: bitmark text or JSON
@@ -40884,6 +41216,54 @@ var BitmarkParserGenerator = class {
40884
41216
  }
40885
41217
  return res;
40886
41218
  }
41219
+ extractPlainText(input, options) {
41220
+ const dataIn = input;
41221
+ const inputFormat = options?.inputFormat;
41222
+ const isString2 = typeof input === "string";
41223
+ let data;
41224
+ const parseAutomatically = () => {
41225
+ let dataOut = dataIn;
41226
+ if (typeof dataIn === "string") {
41227
+ try {
41228
+ dataOut = JSON.parse(dataIn);
41229
+ } catch (_e) {
41230
+ let isBitmark = false;
41231
+ const bitmarkData = this.convert(dataIn, {
41232
+ outputFormat: Output.json
41233
+ });
41234
+ if (bitmarkData.length > 0) {
41235
+ const isError = bitmarkData[0].bit.type === BitType._error;
41236
+ if (!isError) {
41237
+ isBitmark = true;
41238
+ dataOut = bitmarkData;
41239
+ }
41240
+ }
41241
+ if (!isBitmark) {
41242
+ dataOut = this.convertText(dataIn, {
41243
+ textFormat: TextFormat.bitmarkText
41244
+ });
41245
+ }
41246
+ }
41247
+ }
41248
+ return dataOut;
41249
+ };
41250
+ if (inputFormat === InputFormat.bitmark) {
41251
+ data = this.convert(dataIn, {
41252
+ outputFormat: Output.json
41253
+ });
41254
+ } else if (inputFormat === InputFormat.bitmarkText) {
41255
+ data = this.convertText(dataIn, {
41256
+ textFormat: TextFormat.bitmarkText
41257
+ });
41258
+ } else if (inputFormat === InputFormat.plainText) {
41259
+ if (isString2) data = String(input);
41260
+ } else {
41261
+ data = parseAutomatically();
41262
+ }
41263
+ const generator = new PlainTextGenerator();
41264
+ const res = generator.generate(data);
41265
+ return res;
41266
+ }
40887
41267
  /**
40888
41268
  * Breakscape bitmark text.
40889
41269
  *
@@ -40991,6 +41371,16 @@ var BitmarkParserGenerator = class {
40991
41371
  }
40992
41372
  return;
40993
41373
  }
41374
+ textAstToPlainText(textAst, _options) {
41375
+ const textGenerator = new TextGenerator(BitmarkVersion.v3, {
41376
+ //
41377
+ });
41378
+ const res = textGenerator.generateSync(textAst, TextFormat.bitmarkText, TextLocation.body, {
41379
+ noBreakscaping: true,
41380
+ noMarkup: true
41381
+ });
41382
+ return res;
41383
+ }
40994
41384
  /**
40995
41385
  * Get the supported bits as a formatted strings
40996
41386
  *
@@ -41060,6 +41450,7 @@ export {
41060
41450
  InfoFormat,
41061
41451
  InfoType,
41062
41452
  Input,
41453
+ InputFormat,
41063
41454
  JsonFileGenerator,
41064
41455
  JsonGenerator,
41065
41456
  JsonParser,