@gmb/bitmark-parser-generator 5.15.0 → 5.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -10920,7 +10920,7 @@ var instance2 = new Config();
10920
10920
  // src/generated/package_info.ts
10921
10921
  var PACKAGE_INFO = {
10922
10922
  "name": "@gmb/bitmark-parser-generator",
10923
- "version": "5.15.0",
10923
+ "version": "5.17.0",
10924
10924
  "author": "Get More Brain Ltd",
10925
10925
  "license": "ISC",
10926
10926
  "description": "A bitmark parser and generator using Peggy.js"
@@ -11177,6 +11177,16 @@ var TextMarkType = {
11177
11177
  light: "light",
11178
11178
  italic: "italic",
11179
11179
  highlight: "highlight",
11180
+ highlightOrange: "highlightOrange",
11181
+ highlightYellow: "highlightYellow",
11182
+ highlightGreen: "highlightGreen",
11183
+ highlightBlue: "highlightBlue",
11184
+ highlightPurple: "highlightPurple",
11185
+ highlightPink: "highlightPink",
11186
+ highlightBrown: "highlightBrown",
11187
+ highlightBlack: "highlightBlack",
11188
+ highlightWhite: "highlightWhite",
11189
+ highlightGray: "highlightGray",
11180
11190
  // Inline only styles
11181
11191
  strike: "strike",
11182
11192
  subscript: "subscript",
@@ -12701,6 +12711,16 @@ var STANDARD_MARK_TYPES = [
12701
12711
  TextMarkType.highlight
12702
12712
  ];
12703
12713
  var INLINE_MARK_TYPES = [
12714
+ TextMarkType.highlightOrange,
12715
+ TextMarkType.highlightYellow,
12716
+ TextMarkType.highlightGreen,
12717
+ TextMarkType.highlightBlue,
12718
+ TextMarkType.highlightPurple,
12719
+ TextMarkType.highlightPink,
12720
+ TextMarkType.highlightBrown,
12721
+ TextMarkType.highlightBlack,
12722
+ TextMarkType.highlightWhite,
12723
+ TextMarkType.highlightGray,
12704
12724
  TextMarkType.strike,
12705
12725
  TextMarkType.subscript,
12706
12726
  TextMarkType.superscript,
@@ -39743,6 +39763,265 @@ var JsonFileGenerator = class {
39743
39763
  }
39744
39764
  };
39745
39765
 
39766
+ // src/generator/plainText/PlainTextGenerator.ts
39767
+ var TEXT_NODE_TYPES = new Set(Object.values(TextNodeType));
39768
+ var PlainTextGenerator = class {
39769
+ /**
39770
+ * Generate plain text from a string or JSON object.
39771
+ *
39772
+ * @param input - A string (plain or JSON-encoded) or a parsed JSON value.
39773
+ * @returns The extracted plain text.
39774
+ */
39775
+ generate(input) {
39776
+ let data = input;
39777
+ if (typeof data === "string") {
39778
+ try {
39779
+ data = JSON.parse(data);
39780
+ } catch (_e) {
39781
+ return data;
39782
+ }
39783
+ }
39784
+ return this.walk(data).trim();
39785
+ }
39786
+ // ---------------------------------------------------------------------------
39787
+ // Private helpers
39788
+ // ---------------------------------------------------------------------------
39789
+ walk(value) {
39790
+ if (value == null) return "";
39791
+ if (typeof value === "string") return value;
39792
+ if (typeof value !== "object") return "";
39793
+ if (Array.isArray(value)) {
39794
+ if (value.length === 0) return "";
39795
+ if (this.isTextAst(value)) {
39796
+ return this.textAstToPlainText(value);
39797
+ }
39798
+ return value.map((v) => this.walk(v)).filter(Boolean).join("\n");
39799
+ }
39800
+ const obj = value;
39801
+ if (this.isTextNode(obj)) {
39802
+ return this.textNodeToPlainText(obj);
39803
+ }
39804
+ if (this.isBitWrapper(obj)) {
39805
+ return this.walk(obj["bit"]);
39806
+ }
39807
+ const parts = [];
39808
+ for (const val of Object.values(obj)) {
39809
+ if (val == null || typeof val !== "object") continue;
39810
+ const text = this.walk(val);
39811
+ if (text) parts.push(text);
39812
+ }
39813
+ return parts.join("\n");
39814
+ }
39815
+ // -- Type guards -----------------------------------------------------------
39816
+ isTextNode(obj) {
39817
+ return typeof obj["type"] === "string" && TEXT_NODE_TYPES.has(obj["type"]);
39818
+ }
39819
+ isTextAst(arr) {
39820
+ const first = arr[0];
39821
+ return typeof first === "object" && first !== null && !Array.isArray(first) && this.isTextNode(first);
39822
+ }
39823
+ isBitWrapper(obj) {
39824
+ return "bit" in obj && typeof obj["bit"] === "object" && obj["bit"] !== null && !Array.isArray(obj["bit"]);
39825
+ }
39826
+ // -- TextNode extraction ---------------------------------------------------
39827
+ textAstToPlainText(ast) {
39828
+ return ast.map((node) => this.textNodeToPlainText(node)).join("\n");
39829
+ }
39830
+ textNodeToPlainText(node) {
39831
+ const { type, text, content } = node;
39832
+ switch (type) {
39833
+ case TextNodeType.text:
39834
+ return this.textWithMarks(node);
39835
+ case TextNodeType.hardBreak:
39836
+ return "\n";
39837
+ // Block elements whose children are joined without extra separator
39838
+ case TextNodeType.paragraph:
39839
+ case TextNodeType.heading:
39840
+ case TextNodeType.section:
39841
+ case TextNodeType.gap:
39842
+ case TextNodeType.select:
39843
+ case TextNodeType.highlight:
39844
+ case TextNodeType.mark:
39845
+ case TextNodeType.codeBlock:
39846
+ return content ? content.map((c) => this.textNodeToPlainText(c)).join("") : text ?? "";
39847
+ // List items are handled by listToPlainText with indent context
39848
+ case TextNodeType.listItem:
39849
+ return content ? content.map((c) => this.textNodeToPlainText(c)).join("") : text ?? "";
39850
+ // Task item – handled by taskListToPlainText, but fallback if encountered standalone
39851
+ case TextNodeType.taskItem: {
39852
+ const checked = node.attrs?.checked ?? false;
39853
+ const prefix = checked ? "[x] " : "[ ] ";
39854
+ const itemText = content ? content.map((c) => this.textNodeToPlainText(c)).join("") : text ?? "";
39855
+ return `${prefix}${itemText}`;
39856
+ }
39857
+ // List containers – rendered with indent-aware helper
39858
+ case TextNodeType.noBulletList:
39859
+ case TextNodeType.bulletList:
39860
+ case TextNodeType.orderedList:
39861
+ case TextNodeType.orderedListRoman:
39862
+ case TextNodeType.orderedListRomanLower:
39863
+ case TextNodeType.letteredList:
39864
+ case TextNodeType.letteredListLower:
39865
+ return this.listToPlainText(node, 0);
39866
+ // Task list – rendered with indent-aware helper
39867
+ case TextNodeType.taskList:
39868
+ return this.taskListToPlainText(node, 0);
39869
+ // Images – return alt text when available
39870
+ case TextNodeType.image:
39871
+ case TextNodeType.imageInline: {
39872
+ const attrs = node.attrs;
39873
+ return attrs?.alt ?? "";
39874
+ }
39875
+ // LaTeX – return the formula source
39876
+ case TextNodeType.latex: {
39877
+ const latexAttrs = node.attrs;
39878
+ return latexAttrs?.formula ?? "";
39879
+ }
39880
+ default:
39881
+ return content ? content.map((c) => this.textNodeToPlainText(c)).join("") : text ?? "";
39882
+ }
39883
+ }
39884
+ listToPlainText(node, depth) {
39885
+ const { type, content } = node;
39886
+ if (!content || content.length === 0) return "";
39887
+ const indent = " ".repeat(depth);
39888
+ const start = node.attrs?.start ?? 1;
39889
+ const displayStart = start < 1 ? start + 1 : start;
39890
+ return content.map((child, i) => {
39891
+ const { inline, nested } = this.splitListItemContent(child, depth);
39892
+ const prefix = this.listItemPrefix(type, displayStart + i);
39893
+ const line = `${indent}${prefix}${inline}`;
39894
+ return nested ? `${line}
39895
+ ${nested}` : line;
39896
+ }).join("\n");
39897
+ }
39898
+ taskListToPlainText(node, depth) {
39899
+ const { content } = node;
39900
+ if (!content || content.length === 0) return "";
39901
+ const indent = " ".repeat(depth);
39902
+ return content.map((child) => {
39903
+ const checked = child.attrs?.checked ?? false;
39904
+ const prefix = checked ? "[x] " : "[ ] ";
39905
+ const { inline, nested } = this.splitListItemContent(child, depth);
39906
+ const line = `${indent}${prefix}${inline}`;
39907
+ return nested ? `${line}
39908
+ ${nested}` : line;
39909
+ }).join("\n");
39910
+ }
39911
+ splitListItemContent(item, depth) {
39912
+ const children = item.content ?? [];
39913
+ const inlineParts = [];
39914
+ const nestedParts = [];
39915
+ for (const child of children) {
39916
+ if (this.isListType(child.type)) {
39917
+ nestedParts.push(this.renderNestedList(child, depth + 1));
39918
+ } else {
39919
+ inlineParts.push(this.textNodeToPlainText(child));
39920
+ }
39921
+ }
39922
+ return {
39923
+ inline: inlineParts.join(""),
39924
+ nested: nestedParts.join("\n")
39925
+ };
39926
+ }
39927
+ isListType(type) {
39928
+ return type === TextNodeType.bulletList || type === TextNodeType.orderedList || type === TextNodeType.orderedListRoman || type === TextNodeType.orderedListRomanLower || type === TextNodeType.letteredList || type === TextNodeType.letteredListLower || type === TextNodeType.noBulletList || type === TextNodeType.taskList;
39929
+ }
39930
+ renderNestedList(node, depth) {
39931
+ if (node.type === TextNodeType.taskList) {
39932
+ return this.taskListToPlainText(node, depth);
39933
+ }
39934
+ return this.listToPlainText(node, depth);
39935
+ }
39936
+ listItemPrefix(listType, index) {
39937
+ switch (listType) {
39938
+ case TextNodeType.bulletList:
39939
+ return "\u2022 ";
39940
+ case TextNodeType.orderedList:
39941
+ return `${index}. `;
39942
+ case TextNodeType.orderedListRoman:
39943
+ return `${this.toRoman(index)}. `;
39944
+ case TextNodeType.orderedListRomanLower:
39945
+ return `${this.toRoman(index).toLowerCase()}. `;
39946
+ case TextNodeType.letteredList:
39947
+ return `${this.toLetter(index)}. `;
39948
+ case TextNodeType.letteredListLower:
39949
+ return `${this.toLetter(index).toLowerCase()}. `;
39950
+ case TextNodeType.noBulletList:
39951
+ default:
39952
+ return "";
39953
+ }
39954
+ }
39955
+ toRoman(num) {
39956
+ const romanNumerals = [
39957
+ [1e3, "M"],
39958
+ [900, "CM"],
39959
+ [500, "D"],
39960
+ [400, "CD"],
39961
+ [100, "C"],
39962
+ [90, "XC"],
39963
+ [50, "L"],
39964
+ [40, "XL"],
39965
+ [10, "X"],
39966
+ [9, "IX"],
39967
+ [5, "V"],
39968
+ [4, "IV"],
39969
+ [1, "I"]
39970
+ ];
39971
+ let result = "";
39972
+ let remaining = num;
39973
+ for (const [value, numeral] of romanNumerals) {
39974
+ while (remaining >= value) {
39975
+ result += numeral;
39976
+ remaining -= value;
39977
+ }
39978
+ }
39979
+ return result;
39980
+ }
39981
+ toLetter(num) {
39982
+ let result = "";
39983
+ let remaining = num;
39984
+ while (remaining > 0) {
39985
+ remaining--;
39986
+ result = String.fromCharCode(65 + remaining % 26) + result;
39987
+ remaining = Math.floor(remaining / 26);
39988
+ }
39989
+ return result;
39990
+ }
39991
+ textWithMarks(node) {
39992
+ const { text, marks } = node;
39993
+ const parts = [];
39994
+ const linkMark = marks?.find((m) => m.type === "link");
39995
+ const href = linkMark?.attrs?.href;
39996
+ if (text && href && text !== href) {
39997
+ const hrefBare = href.replace(/^https?:\/\//, "");
39998
+ if (text.includes(hrefBare)) {
39999
+ parts.push(text.replace(hrefBare, href));
40000
+ } else if (text.includes(href)) {
40001
+ parts.push(text);
40002
+ } else {
40003
+ parts.push(`${text} ${href}`);
40004
+ }
40005
+ } else if (text) {
40006
+ parts.push(text);
40007
+ } else if (href) {
40008
+ parts.push(href);
40009
+ }
40010
+ if (marks) {
40011
+ for (const mark of marks) {
40012
+ if (mark.type === "footnote") {
40013
+ const footnote = mark;
40014
+ if (footnote.attrs?.content) {
40015
+ const footnoteText = footnote.attrs.content.map((c) => this.textNodeToPlainText(c)).join("");
40016
+ if (footnoteText) parts.push(footnoteText);
40017
+ }
40018
+ }
40019
+ }
40020
+ }
40021
+ return parts.join(" ");
40022
+ }
40023
+ };
40024
+
39746
40025
  // src/info/ConfigBuilder.ts
39747
40026
  import path3 from "path";
39748
40027
  import { Enum as Enum22 } from "@ncoderz/superenum";
@@ -40452,7 +40731,25 @@ var Output = {
40452
40731
  /**
40453
40732
  * Output AST as a plain JS object, or a file
40454
40733
  */
40455
- ast: "ast"
40734
+ ast: "ast",
40735
+ /**
40736
+ * Output plain text as a string, or a file
40737
+ */
40738
+ text: "text"
40739
+ };
40740
+ var InputFormat = {
40741
+ /**
40742
+ * Input is bitmark
40743
+ */
40744
+ bitmark: "bitmark",
40745
+ /**
40746
+ * Input is bitmarkText
40747
+ */
40748
+ bitmarkText: "bitmarkText",
40749
+ /**
40750
+ * Input is plain text
40751
+ */
40752
+ plainText: "plainText"
40456
40753
  };
40457
40754
  var BitmarkParserGenerator = class {
40458
40755
  constructor() {
@@ -40554,6 +40851,12 @@ var BitmarkParserGenerator = class {
40554
40851
  * - input(JSON/AST) ==> output(bitmark)
40555
40852
  * - input(bitmark) ==> output(JSON)
40556
40853
  *
40854
+ * Output type can be overridden to one of the following:
40855
+ * - bitmark: output bitmark string
40856
+ * - json: output JSON as a plain JS object, or a file
40857
+ * - ast: output AST as a plain JS object, or a file
40858
+ * - text: output plain text as a string, or a file
40859
+ *
40557
40860
  * By default, the result is returned as a string for bitmark, or a plain JS object for JSON/AST.
40558
40861
  *
40559
40862
  * The options can be used to write the output to a file and to set conversion options or override defaults.
@@ -40576,6 +40879,7 @@ var BitmarkParserGenerator = class {
40576
40879
  const outputBitmark = outputFormat === Output.bitmark;
40577
40880
  const outputJson = outputFormat === Output.json;
40578
40881
  const outputAst = outputFormat === Output.ast;
40882
+ const outputText = outputFormat === Output.text;
40579
40883
  const bitmarkParserType = BitmarkParserType.peggy;
40580
40884
  let inStr = input;
40581
40885
  const inputIsString = typeof input === "string";
@@ -40622,6 +40926,22 @@ var BitmarkParserGenerator = class {
40622
40926
  }
40623
40927
  }
40624
40928
  };
40929
+ const bitmarkToText = (bitmarkStr) => {
40930
+ ast = this.bitmarkParser.toAst(bitmarkStr, {
40931
+ parserType: bitmarkParserType
40932
+ });
40933
+ const jsonGenerator = new JsonObjectGenerator(opts);
40934
+ const json = jsonGenerator.generateSync(ast);
40935
+ const textGenerator = new PlainTextGenerator();
40936
+ const str = textGenerator.generate(json);
40937
+ if (opts.outputFile) {
40938
+ fs4.writeFileSync(opts.outputFile, str, {
40939
+ encoding: "utf8"
40940
+ });
40941
+ } else {
40942
+ res = str;
40943
+ }
40944
+ };
40625
40945
  const astToBitmark = (astJson) => {
40626
40946
  if (opts.outputFile) {
40627
40947
  const generator = new BitmarkFileGenerator(opts.outputFile, opts);
@@ -40644,6 +40964,19 @@ var BitmarkParserGenerator = class {
40644
40964
  res = this.jsonStringifyPrettify(json, jsonOptions);
40645
40965
  }
40646
40966
  };
40967
+ const astToText = (astJson) => {
40968
+ const jsonGenerator = new JsonObjectGenerator(opts);
40969
+ const json = jsonGenerator.generateSync(astJson);
40970
+ const textGenerator = new PlainTextGenerator();
40971
+ const str = textGenerator.generate(json);
40972
+ if (opts.outputFile) {
40973
+ fs4.writeFileSync(opts.outputFile, str, {
40974
+ encoding: "utf8"
40975
+ });
40976
+ } else {
40977
+ res = str;
40978
+ }
40979
+ };
40647
40980
  const jsonToBitmark = (astJson) => {
40648
40981
  if (opts.outputFile) {
40649
40982
  const generator = new BitmarkFileGenerator(opts.outputFile, opts);
@@ -40656,6 +40989,19 @@ var BitmarkParserGenerator = class {
40656
40989
  const jsonToAst = (astJson) => {
40657
40990
  res = this.jsonStringifyPrettify(astJson, jsonOptions);
40658
40991
  };
40992
+ const jsonToText = (astJson) => {
40993
+ const jsonGenerator = new JsonObjectGenerator(opts);
40994
+ const json = jsonGenerator.generateSync(astJson);
40995
+ const textGenerator = new PlainTextGenerator();
40996
+ const str = textGenerator.generate(json);
40997
+ if (opts.outputFile) {
40998
+ fs4.writeFileSync(opts.outputFile, str, {
40999
+ encoding: "utf8"
41000
+ });
41001
+ } else {
41002
+ res = str;
41003
+ }
41004
+ };
40659
41005
  const jsonToJson = (astJson) => {
40660
41006
  astToJson(astJson);
40661
41007
  };
@@ -40664,6 +41010,8 @@ var BitmarkParserGenerator = class {
40664
41010
  bitmarkToBitmark(inStr);
40665
41011
  } else if (outputAst) {
40666
41012
  bitmarkToAst(inStr);
41013
+ } else if (outputText) {
41014
+ bitmarkToText(inStr);
40667
41015
  } else {
40668
41016
  bitmarkToJson(inStr);
40669
41017
  }
@@ -40673,6 +41021,8 @@ var BitmarkParserGenerator = class {
40673
41021
  astToAst(ast);
40674
41022
  } else if (outputJson) {
40675
41023
  astToJson(ast);
41024
+ } else if (outputText) {
41025
+ astToText(ast);
40676
41026
  } else {
40677
41027
  astToBitmark(ast);
40678
41028
  }
@@ -40682,6 +41032,8 @@ var BitmarkParserGenerator = class {
40682
41032
  jsonToJson(ast);
40683
41033
  } else if (outputAst) {
40684
41034
  jsonToAst(ast);
41035
+ } else if (outputText) {
41036
+ jsonToText(ast);
40685
41037
  } else {
40686
41038
  jsonToBitmark(ast);
40687
41039
  }
@@ -40810,7 +41162,7 @@ var BitmarkParserGenerator = class {
40810
41162
  return res;
40811
41163
  }
40812
41164
  /**
40813
- * Convert bitmark text from JSON, or JSON to bitmark text.
41165
+ * Convert bitmark text to JSON, or JSON to bitmark text.
40814
41166
  *
40815
41167
  * Input type is detected automatically and may be:
40816
41168
  * - string: bitmark text or JSON
@@ -40884,6 +41236,54 @@ var BitmarkParserGenerator = class {
40884
41236
  }
40885
41237
  return res;
40886
41238
  }
41239
+ extractPlainText(input, options) {
41240
+ const dataIn = input;
41241
+ const inputFormat = options?.inputFormat;
41242
+ const isString2 = typeof input === "string";
41243
+ let data;
41244
+ const parseAutomatically = () => {
41245
+ let dataOut = dataIn;
41246
+ if (typeof dataIn === "string") {
41247
+ try {
41248
+ dataOut = JSON.parse(dataIn);
41249
+ } catch (_e) {
41250
+ let isBitmark = false;
41251
+ const bitmarkData = this.convert(dataIn, {
41252
+ outputFormat: Output.json
41253
+ });
41254
+ if (bitmarkData.length > 0) {
41255
+ const isError = bitmarkData[0].bit.type === BitType._error;
41256
+ if (!isError) {
41257
+ isBitmark = true;
41258
+ dataOut = bitmarkData;
41259
+ }
41260
+ }
41261
+ if (!isBitmark) {
41262
+ dataOut = this.convertText(dataIn, {
41263
+ textFormat: TextFormat.bitmarkText
41264
+ });
41265
+ }
41266
+ }
41267
+ }
41268
+ return dataOut;
41269
+ };
41270
+ if (inputFormat === InputFormat.bitmark) {
41271
+ data = this.convert(dataIn, {
41272
+ outputFormat: Output.json
41273
+ });
41274
+ } else if (inputFormat === InputFormat.bitmarkText) {
41275
+ data = this.convertText(dataIn, {
41276
+ textFormat: TextFormat.bitmarkText
41277
+ });
41278
+ } else if (inputFormat === InputFormat.plainText) {
41279
+ if (isString2) data = String(input);
41280
+ } else {
41281
+ data = parseAutomatically();
41282
+ }
41283
+ const generator = new PlainTextGenerator();
41284
+ const res = generator.generate(data);
41285
+ return res;
41286
+ }
40887
41287
  /**
40888
41288
  * Breakscape bitmark text.
40889
41289
  *
@@ -40991,6 +41391,16 @@ var BitmarkParserGenerator = class {
40991
41391
  }
40992
41392
  return;
40993
41393
  }
41394
+ textAstToPlainText(textAst, _options) {
41395
+ const textGenerator = new TextGenerator(BitmarkVersion.v3, {
41396
+ //
41397
+ });
41398
+ const res = textGenerator.generateSync(textAst, TextFormat.bitmarkText, TextLocation.body, {
41399
+ noBreakscaping: true,
41400
+ noMarkup: true
41401
+ });
41402
+ return res;
41403
+ }
40994
41404
  /**
40995
41405
  * Get the supported bits as a formatted strings
40996
41406
  *
@@ -41060,6 +41470,7 @@ export {
41060
41470
  InfoFormat,
41061
41471
  InfoType,
41062
41472
  Input,
41473
+ InputFormat,
41063
41474
  JsonFileGenerator,
41064
41475
  JsonGenerator,
41065
41476
  JsonParser,