@gmb/bitmark-parser-generator 5.15.0 → 5.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -48,6 +48,7 @@ __export(index_exports, {
48
48
  InfoFormat: () => InfoFormat,
49
49
  InfoType: () => InfoType,
50
50
  Input: () => Input,
51
+ InputFormat: () => InputFormat,
51
52
  JsonFileGenerator: () => JsonFileGenerator,
52
53
  JsonGenerator: () => JsonGenerator,
53
54
  JsonParser: () => JsonParser,
@@ -10981,7 +10982,7 @@ var instance2 = new Config();
10981
10982
  // src/generated/package_info.ts
10982
10983
  var PACKAGE_INFO = {
10983
10984
  "name": "@gmb/bitmark-parser-generator",
10984
- "version": "5.15.0",
10985
+ "version": "5.17.0",
10985
10986
  "author": "Get More Brain Ltd",
10986
10987
  "license": "ISC",
10987
10988
  "description": "A bitmark parser and generator using Peggy.js"
@@ -11238,6 +11239,16 @@ var TextMarkType = {
11238
11239
  light: "light",
11239
11240
  italic: "italic",
11240
11241
  highlight: "highlight",
11242
+ highlightOrange: "highlightOrange",
11243
+ highlightYellow: "highlightYellow",
11244
+ highlightGreen: "highlightGreen",
11245
+ highlightBlue: "highlightBlue",
11246
+ highlightPurple: "highlightPurple",
11247
+ highlightPink: "highlightPink",
11248
+ highlightBrown: "highlightBrown",
11249
+ highlightBlack: "highlightBlack",
11250
+ highlightWhite: "highlightWhite",
11251
+ highlightGray: "highlightGray",
11241
11252
  // Inline only styles
11242
11253
  strike: "strike",
11243
11254
  subscript: "subscript",
@@ -12762,6 +12773,16 @@ var STANDARD_MARK_TYPES = [
12762
12773
  TextMarkType.highlight
12763
12774
  ];
12764
12775
  var INLINE_MARK_TYPES = [
12776
+ TextMarkType.highlightOrange,
12777
+ TextMarkType.highlightYellow,
12778
+ TextMarkType.highlightGreen,
12779
+ TextMarkType.highlightBlue,
12780
+ TextMarkType.highlightPurple,
12781
+ TextMarkType.highlightPink,
12782
+ TextMarkType.highlightBrown,
12783
+ TextMarkType.highlightBlack,
12784
+ TextMarkType.highlightWhite,
12785
+ TextMarkType.highlightGray,
12765
12786
  TextMarkType.strike,
12766
12787
  TextMarkType.subscript,
12767
12788
  TextMarkType.superscript,
@@ -39804,6 +39825,265 @@ var JsonFileGenerator = class {
39804
39825
  }
39805
39826
  };
39806
39827
 
39828
+ // src/generator/plainText/PlainTextGenerator.ts
39829
+ var TEXT_NODE_TYPES = new Set(Object.values(TextNodeType));
39830
+ var PlainTextGenerator = class {
39831
+ /**
39832
+ * Generate plain text from a string or JSON object.
39833
+ *
39834
+ * @param input - A string (plain or JSON-encoded) or a parsed JSON value.
39835
+ * @returns The extracted plain text.
39836
+ */
39837
+ generate(input) {
39838
+ let data = input;
39839
+ if (typeof data === "string") {
39840
+ try {
39841
+ data = JSON.parse(data);
39842
+ } catch (_e) {
39843
+ return data;
39844
+ }
39845
+ }
39846
+ return this.walk(data).trim();
39847
+ }
39848
+ // ---------------------------------------------------------------------------
39849
+ // Private helpers
39850
+ // ---------------------------------------------------------------------------
39851
+ walk(value) {
39852
+ if (value == null) return "";
39853
+ if (typeof value === "string") return value;
39854
+ if (typeof value !== "object") return "";
39855
+ if (Array.isArray(value)) {
39856
+ if (value.length === 0) return "";
39857
+ if (this.isTextAst(value)) {
39858
+ return this.textAstToPlainText(value);
39859
+ }
39860
+ return value.map((v) => this.walk(v)).filter(Boolean).join("\n");
39861
+ }
39862
+ const obj = value;
39863
+ if (this.isTextNode(obj)) {
39864
+ return this.textNodeToPlainText(obj);
39865
+ }
39866
+ if (this.isBitWrapper(obj)) {
39867
+ return this.walk(obj["bit"]);
39868
+ }
39869
+ const parts = [];
39870
+ for (const val of Object.values(obj)) {
39871
+ if (val == null || typeof val !== "object") continue;
39872
+ const text = this.walk(val);
39873
+ if (text) parts.push(text);
39874
+ }
39875
+ return parts.join("\n");
39876
+ }
39877
+ // -- Type guards -----------------------------------------------------------
39878
+ isTextNode(obj) {
39879
+ return typeof obj["type"] === "string" && TEXT_NODE_TYPES.has(obj["type"]);
39880
+ }
39881
+ isTextAst(arr) {
39882
+ const first = arr[0];
39883
+ return typeof first === "object" && first !== null && !Array.isArray(first) && this.isTextNode(first);
39884
+ }
39885
+ isBitWrapper(obj) {
39886
+ return "bit" in obj && typeof obj["bit"] === "object" && obj["bit"] !== null && !Array.isArray(obj["bit"]);
39887
+ }
39888
+ // -- TextNode extraction ---------------------------------------------------
39889
+ textAstToPlainText(ast) {
39890
+ return ast.map((node) => this.textNodeToPlainText(node)).join("\n");
39891
+ }
39892
+ textNodeToPlainText(node) {
39893
+ const { type, text, content } = node;
39894
+ switch (type) {
39895
+ case TextNodeType.text:
39896
+ return this.textWithMarks(node);
39897
+ case TextNodeType.hardBreak:
39898
+ return "\n";
39899
+ // Block elements whose children are joined without extra separator
39900
+ case TextNodeType.paragraph:
39901
+ case TextNodeType.heading:
39902
+ case TextNodeType.section:
39903
+ case TextNodeType.gap:
39904
+ case TextNodeType.select:
39905
+ case TextNodeType.highlight:
39906
+ case TextNodeType.mark:
39907
+ case TextNodeType.codeBlock:
39908
+ return content ? content.map((c) => this.textNodeToPlainText(c)).join("") : text ?? "";
39909
+ // List items are handled by listToPlainText with indent context
39910
+ case TextNodeType.listItem:
39911
+ return content ? content.map((c) => this.textNodeToPlainText(c)).join("") : text ?? "";
39912
+ // Task item – handled by taskListToPlainText, but fallback if encountered standalone
39913
+ case TextNodeType.taskItem: {
39914
+ const checked = node.attrs?.checked ?? false;
39915
+ const prefix = checked ? "[x] " : "[ ] ";
39916
+ const itemText = content ? content.map((c) => this.textNodeToPlainText(c)).join("") : text ?? "";
39917
+ return `${prefix}${itemText}`;
39918
+ }
39919
+ // List containers – rendered with indent-aware helper
39920
+ case TextNodeType.noBulletList:
39921
+ case TextNodeType.bulletList:
39922
+ case TextNodeType.orderedList:
39923
+ case TextNodeType.orderedListRoman:
39924
+ case TextNodeType.orderedListRomanLower:
39925
+ case TextNodeType.letteredList:
39926
+ case TextNodeType.letteredListLower:
39927
+ return this.listToPlainText(node, 0);
39928
+ // Task list – rendered with indent-aware helper
39929
+ case TextNodeType.taskList:
39930
+ return this.taskListToPlainText(node, 0);
39931
+ // Images – return alt text when available
39932
+ case TextNodeType.image:
39933
+ case TextNodeType.imageInline: {
39934
+ const attrs = node.attrs;
39935
+ return attrs?.alt ?? "";
39936
+ }
39937
+ // LaTeX – return the formula source
39938
+ case TextNodeType.latex: {
39939
+ const latexAttrs = node.attrs;
39940
+ return latexAttrs?.formula ?? "";
39941
+ }
39942
+ default:
39943
+ return content ? content.map((c) => this.textNodeToPlainText(c)).join("") : text ?? "";
39944
+ }
39945
+ }
39946
+ listToPlainText(node, depth) {
39947
+ const { type, content } = node;
39948
+ if (!content || content.length === 0) return "";
39949
+ const indent = " ".repeat(depth);
39950
+ const start = node.attrs?.start ?? 1;
39951
+ const displayStart = start < 1 ? start + 1 : start;
39952
+ return content.map((child, i) => {
39953
+ const { inline, nested } = this.splitListItemContent(child, depth);
39954
+ const prefix = this.listItemPrefix(type, displayStart + i);
39955
+ const line = `${indent}${prefix}${inline}`;
39956
+ return nested ? `${line}
39957
+ ${nested}` : line;
39958
+ }).join("\n");
39959
+ }
39960
+ taskListToPlainText(node, depth) {
39961
+ const { content } = node;
39962
+ if (!content || content.length === 0) return "";
39963
+ const indent = " ".repeat(depth);
39964
+ return content.map((child) => {
39965
+ const checked = child.attrs?.checked ?? false;
39966
+ const prefix = checked ? "[x] " : "[ ] ";
39967
+ const { inline, nested } = this.splitListItemContent(child, depth);
39968
+ const line = `${indent}${prefix}${inline}`;
39969
+ return nested ? `${line}
39970
+ ${nested}` : line;
39971
+ }).join("\n");
39972
+ }
39973
+ splitListItemContent(item, depth) {
39974
+ const children = item.content ?? [];
39975
+ const inlineParts = [];
39976
+ const nestedParts = [];
39977
+ for (const child of children) {
39978
+ if (this.isListType(child.type)) {
39979
+ nestedParts.push(this.renderNestedList(child, depth + 1));
39980
+ } else {
39981
+ inlineParts.push(this.textNodeToPlainText(child));
39982
+ }
39983
+ }
39984
+ return {
39985
+ inline: inlineParts.join(""),
39986
+ nested: nestedParts.join("\n")
39987
+ };
39988
+ }
39989
+ isListType(type) {
39990
+ return type === TextNodeType.bulletList || type === TextNodeType.orderedList || type === TextNodeType.orderedListRoman || type === TextNodeType.orderedListRomanLower || type === TextNodeType.letteredList || type === TextNodeType.letteredListLower || type === TextNodeType.noBulletList || type === TextNodeType.taskList;
39991
+ }
39992
+ renderNestedList(node, depth) {
39993
+ if (node.type === TextNodeType.taskList) {
39994
+ return this.taskListToPlainText(node, depth);
39995
+ }
39996
+ return this.listToPlainText(node, depth);
39997
+ }
39998
+ listItemPrefix(listType, index) {
39999
+ switch (listType) {
40000
+ case TextNodeType.bulletList:
40001
+ return "\u2022 ";
40002
+ case TextNodeType.orderedList:
40003
+ return `${index}. `;
40004
+ case TextNodeType.orderedListRoman:
40005
+ return `${this.toRoman(index)}. `;
40006
+ case TextNodeType.orderedListRomanLower:
40007
+ return `${this.toRoman(index).toLowerCase()}. `;
40008
+ case TextNodeType.letteredList:
40009
+ return `${this.toLetter(index)}. `;
40010
+ case TextNodeType.letteredListLower:
40011
+ return `${this.toLetter(index).toLowerCase()}. `;
40012
+ case TextNodeType.noBulletList:
40013
+ default:
40014
+ return "";
40015
+ }
40016
+ }
40017
+ toRoman(num) {
40018
+ const romanNumerals = [
40019
+ [1e3, "M"],
40020
+ [900, "CM"],
40021
+ [500, "D"],
40022
+ [400, "CD"],
40023
+ [100, "C"],
40024
+ [90, "XC"],
40025
+ [50, "L"],
40026
+ [40, "XL"],
40027
+ [10, "X"],
40028
+ [9, "IX"],
40029
+ [5, "V"],
40030
+ [4, "IV"],
40031
+ [1, "I"]
40032
+ ];
40033
+ let result = "";
40034
+ let remaining = num;
40035
+ for (const [value, numeral] of romanNumerals) {
40036
+ while (remaining >= value) {
40037
+ result += numeral;
40038
+ remaining -= value;
40039
+ }
40040
+ }
40041
+ return result;
40042
+ }
40043
+ toLetter(num) {
40044
+ let result = "";
40045
+ let remaining = num;
40046
+ while (remaining > 0) {
40047
+ remaining--;
40048
+ result = String.fromCharCode(65 + remaining % 26) + result;
40049
+ remaining = Math.floor(remaining / 26);
40050
+ }
40051
+ return result;
40052
+ }
40053
+ textWithMarks(node) {
40054
+ const { text, marks } = node;
40055
+ const parts = [];
40056
+ const linkMark = marks?.find((m) => m.type === "link");
40057
+ const href = linkMark?.attrs?.href;
40058
+ if (text && href && text !== href) {
40059
+ const hrefBare = href.replace(/^https?:\/\//, "");
40060
+ if (text.includes(hrefBare)) {
40061
+ parts.push(text.replace(hrefBare, href));
40062
+ } else if (text.includes(href)) {
40063
+ parts.push(text);
40064
+ } else {
40065
+ parts.push(`${text} ${href}`);
40066
+ }
40067
+ } else if (text) {
40068
+ parts.push(text);
40069
+ } else if (href) {
40070
+ parts.push(href);
40071
+ }
40072
+ if (marks) {
40073
+ for (const mark of marks) {
40074
+ if (mark.type === "footnote") {
40075
+ const footnote = mark;
40076
+ if (footnote.attrs?.content) {
40077
+ const footnoteText = footnote.attrs.content.map((c) => this.textNodeToPlainText(c)).join("");
40078
+ if (footnoteText) parts.push(footnoteText);
40079
+ }
40080
+ }
40081
+ }
40082
+ }
40083
+ return parts.join(" ");
40084
+ }
40085
+ };
40086
+
39807
40087
  // src/info/ConfigBuilder.ts
39808
40088
  var import_node_path3 = __toESM(require("path"), 1);
39809
40089
  var import_superenum50 = require("@ncoderz/superenum");
@@ -40513,7 +40793,25 @@ var Output = {
40513
40793
  /**
40514
40794
  * Output AST as a plain JS object, or a file
40515
40795
  */
40516
- ast: "ast"
40796
+ ast: "ast",
40797
+ /**
40798
+ * Output plain text as a string, or a file
40799
+ */
40800
+ text: "text"
40801
+ };
40802
+ var InputFormat = {
40803
+ /**
40804
+ * Input is bitmark
40805
+ */
40806
+ bitmark: "bitmark",
40807
+ /**
40808
+ * Input is bitmarkText
40809
+ */
40810
+ bitmarkText: "bitmarkText",
40811
+ /**
40812
+ * Input is plain text
40813
+ */
40814
+ plainText: "plainText"
40517
40815
  };
40518
40816
  var BitmarkParserGenerator = class {
40519
40817
  constructor() {
@@ -40615,6 +40913,12 @@ var BitmarkParserGenerator = class {
40615
40913
  * - input(JSON/AST) ==> output(bitmark)
40616
40914
  * - input(bitmark) ==> output(JSON)
40617
40915
  *
40916
+ * Output type can be overridden to one of the following:
40917
+ * - bitmark: output bitmark string
40918
+ * - json: output JSON as a plain JS object, or a file
40919
+ * - ast: output AST as a plain JS object, or a file
40920
+ * - text: output plain text as a string, or a file
40921
+ *
40618
40922
  * By default, the result is returned as a string for bitmark, or a plain JS object for JSON/AST.
40619
40923
  *
40620
40924
  * The options can be used to write the output to a file and to set conversion options or override defaults.
@@ -40637,6 +40941,7 @@ var BitmarkParserGenerator = class {
40637
40941
  const outputBitmark = outputFormat === Output.bitmark;
40638
40942
  const outputJson = outputFormat === Output.json;
40639
40943
  const outputAst = outputFormat === Output.ast;
40944
+ const outputText = outputFormat === Output.text;
40640
40945
  const bitmarkParserType = BitmarkParserType.peggy;
40641
40946
  let inStr = input;
40642
40947
  const inputIsString = typeof input === "string";
@@ -40683,6 +40988,22 @@ var BitmarkParserGenerator = class {
40683
40988
  }
40684
40989
  }
40685
40990
  };
40991
+ const bitmarkToText = (bitmarkStr) => {
40992
+ ast = this.bitmarkParser.toAst(bitmarkStr, {
40993
+ parserType: bitmarkParserType
40994
+ });
40995
+ const jsonGenerator = new JsonObjectGenerator(opts);
40996
+ const json = jsonGenerator.generateSync(ast);
40997
+ const textGenerator = new PlainTextGenerator();
40998
+ const str = textGenerator.generate(json);
40999
+ if (opts.outputFile) {
41000
+ import_fs_extra4.default.writeFileSync(opts.outputFile, str, {
41001
+ encoding: "utf8"
41002
+ });
41003
+ } else {
41004
+ res = str;
41005
+ }
41006
+ };
40686
41007
  const astToBitmark = (astJson) => {
40687
41008
  if (opts.outputFile) {
40688
41009
  const generator = new BitmarkFileGenerator(opts.outputFile, opts);
@@ -40705,6 +41026,19 @@ var BitmarkParserGenerator = class {
40705
41026
  res = this.jsonStringifyPrettify(json, jsonOptions);
40706
41027
  }
40707
41028
  };
41029
+ const astToText = (astJson) => {
41030
+ const jsonGenerator = new JsonObjectGenerator(opts);
41031
+ const json = jsonGenerator.generateSync(astJson);
41032
+ const textGenerator = new PlainTextGenerator();
41033
+ const str = textGenerator.generate(json);
41034
+ if (opts.outputFile) {
41035
+ import_fs_extra4.default.writeFileSync(opts.outputFile, str, {
41036
+ encoding: "utf8"
41037
+ });
41038
+ } else {
41039
+ res = str;
41040
+ }
41041
+ };
40708
41042
  const jsonToBitmark = (astJson) => {
40709
41043
  if (opts.outputFile) {
40710
41044
  const generator = new BitmarkFileGenerator(opts.outputFile, opts);
@@ -40717,6 +41051,19 @@ var BitmarkParserGenerator = class {
40717
41051
  const jsonToAst = (astJson) => {
40718
41052
  res = this.jsonStringifyPrettify(astJson, jsonOptions);
40719
41053
  };
41054
+ const jsonToText = (astJson) => {
41055
+ const jsonGenerator = new JsonObjectGenerator(opts);
41056
+ const json = jsonGenerator.generateSync(astJson);
41057
+ const textGenerator = new PlainTextGenerator();
41058
+ const str = textGenerator.generate(json);
41059
+ if (opts.outputFile) {
41060
+ import_fs_extra4.default.writeFileSync(opts.outputFile, str, {
41061
+ encoding: "utf8"
41062
+ });
41063
+ } else {
41064
+ res = str;
41065
+ }
41066
+ };
40720
41067
  const jsonToJson = (astJson) => {
40721
41068
  astToJson(astJson);
40722
41069
  };
@@ -40725,6 +41072,8 @@ var BitmarkParserGenerator = class {
40725
41072
  bitmarkToBitmark(inStr);
40726
41073
  } else if (outputAst) {
40727
41074
  bitmarkToAst(inStr);
41075
+ } else if (outputText) {
41076
+ bitmarkToText(inStr);
40728
41077
  } else {
40729
41078
  bitmarkToJson(inStr);
40730
41079
  }
@@ -40734,6 +41083,8 @@ var BitmarkParserGenerator = class {
40734
41083
  astToAst(ast);
40735
41084
  } else if (outputJson) {
40736
41085
  astToJson(ast);
41086
+ } else if (outputText) {
41087
+ astToText(ast);
40737
41088
  } else {
40738
41089
  astToBitmark(ast);
40739
41090
  }
@@ -40743,6 +41094,8 @@ var BitmarkParserGenerator = class {
40743
41094
  jsonToJson(ast);
40744
41095
  } else if (outputAst) {
40745
41096
  jsonToAst(ast);
41097
+ } else if (outputText) {
41098
+ jsonToText(ast);
40746
41099
  } else {
40747
41100
  jsonToBitmark(ast);
40748
41101
  }
@@ -40871,7 +41224,7 @@ var BitmarkParserGenerator = class {
40871
41224
  return res;
40872
41225
  }
40873
41226
  /**
40874
- * Convert bitmark text from JSON, or JSON to bitmark text.
41227
+ * Convert bitmark text to JSON, or JSON to bitmark text.
40875
41228
  *
40876
41229
  * Input type is detected automatically and may be:
40877
41230
  * - string: bitmark text or JSON
@@ -40945,6 +41298,54 @@ var BitmarkParserGenerator = class {
40945
41298
  }
40946
41299
  return res;
40947
41300
  }
41301
+ extractPlainText(input, options) {
41302
+ const dataIn = input;
41303
+ const inputFormat = options?.inputFormat;
41304
+ const isString2 = typeof input === "string";
41305
+ let data;
41306
+ const parseAutomatically = () => {
41307
+ let dataOut = dataIn;
41308
+ if (typeof dataIn === "string") {
41309
+ try {
41310
+ dataOut = JSON.parse(dataIn);
41311
+ } catch (_e) {
41312
+ let isBitmark = false;
41313
+ const bitmarkData = this.convert(dataIn, {
41314
+ outputFormat: Output.json
41315
+ });
41316
+ if (bitmarkData.length > 0) {
41317
+ const isError = bitmarkData[0].bit.type === BitType._error;
41318
+ if (!isError) {
41319
+ isBitmark = true;
41320
+ dataOut = bitmarkData;
41321
+ }
41322
+ }
41323
+ if (!isBitmark) {
41324
+ dataOut = this.convertText(dataIn, {
41325
+ textFormat: TextFormat.bitmarkText
41326
+ });
41327
+ }
41328
+ }
41329
+ }
41330
+ return dataOut;
41331
+ };
41332
+ if (inputFormat === InputFormat.bitmark) {
41333
+ data = this.convert(dataIn, {
41334
+ outputFormat: Output.json
41335
+ });
41336
+ } else if (inputFormat === InputFormat.bitmarkText) {
41337
+ data = this.convertText(dataIn, {
41338
+ textFormat: TextFormat.bitmarkText
41339
+ });
41340
+ } else if (inputFormat === InputFormat.plainText) {
41341
+ if (isString2) data = String(input);
41342
+ } else {
41343
+ data = parseAutomatically();
41344
+ }
41345
+ const generator = new PlainTextGenerator();
41346
+ const res = generator.generate(data);
41347
+ return res;
41348
+ }
40948
41349
  /**
40949
41350
  * Breakscape bitmark text.
40950
41351
  *
@@ -41052,6 +41453,16 @@ var BitmarkParserGenerator = class {
41052
41453
  }
41053
41454
  return;
41054
41455
  }
41456
+ textAstToPlainText(textAst, _options) {
41457
+ const textGenerator = new TextGenerator(BitmarkVersion.v3, {
41458
+ //
41459
+ });
41460
+ const res = textGenerator.generateSync(textAst, TextFormat.bitmarkText, TextLocation.body, {
41461
+ noBreakscaping: true,
41462
+ noMarkup: true
41463
+ });
41464
+ return res;
41465
+ }
41055
41466
  /**
41056
41467
  * Get the supported bits as a formatted strings
41057
41468
  *
@@ -41122,6 +41533,7 @@ init();
41122
41533
  InfoFormat,
41123
41534
  InfoType,
41124
41535
  Input,
41536
+ InputFormat,
41125
41537
  JsonFileGenerator,
41126
41538
  JsonGenerator,
41127
41539
  JsonParser,