@gmb/bitmark-parser-generator 5.15.0 → 5.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -48,6 +48,7 @@ __export(index_exports, {
48
48
  InfoFormat: () => InfoFormat,
49
49
  InfoType: () => InfoType,
50
50
  Input: () => Input,
51
+ InputFormat: () => InputFormat,
51
52
  JsonFileGenerator: () => JsonFileGenerator,
52
53
  JsonGenerator: () => JsonGenerator,
53
54
  JsonParser: () => JsonParser,
@@ -10981,7 +10982,7 @@ var instance2 = new Config();
10981
10982
  // src/generated/package_info.ts
10982
10983
  var PACKAGE_INFO = {
10983
10984
  "name": "@gmb/bitmark-parser-generator",
10984
- "version": "5.15.0",
10985
+ "version": "5.16.0",
10985
10986
  "author": "Get More Brain Ltd",
10986
10987
  "license": "ISC",
10987
10988
  "description": "A bitmark parser and generator using Peggy.js"
@@ -39804,6 +39805,265 @@ var JsonFileGenerator = class {
39804
39805
  }
39805
39806
  };
39806
39807
 
39808
+ // src/generator/plainText/PlainTextGenerator.ts
39809
+ var TEXT_NODE_TYPES = new Set(Object.values(TextNodeType));
39810
+ var PlainTextGenerator = class {
39811
+ /**
39812
+ * Generate plain text from a string or JSON object.
39813
+ *
39814
+ * @param input - A string (plain or JSON-encoded) or a parsed JSON value.
39815
+ * @returns The extracted plain text.
39816
+ */
39817
+ generate(input) {
39818
+ let data = input;
39819
+ if (typeof data === "string") {
39820
+ try {
39821
+ data = JSON.parse(data);
39822
+ } catch (_e) {
39823
+ return data;
39824
+ }
39825
+ }
39826
+ return this.walk(data).trim();
39827
+ }
39828
+ // ---------------------------------------------------------------------------
39829
+ // Private helpers
39830
+ // ---------------------------------------------------------------------------
39831
+ walk(value) {
39832
+ if (value == null) return "";
39833
+ if (typeof value === "string") return value;
39834
+ if (typeof value !== "object") return "";
39835
+ if (Array.isArray(value)) {
39836
+ if (value.length === 0) return "";
39837
+ if (this.isTextAst(value)) {
39838
+ return this.textAstToPlainText(value);
39839
+ }
39840
+ return value.map((v) => this.walk(v)).filter(Boolean).join("\n");
39841
+ }
39842
+ const obj = value;
39843
+ if (this.isTextNode(obj)) {
39844
+ return this.textNodeToPlainText(obj);
39845
+ }
39846
+ if (this.isBitWrapper(obj)) {
39847
+ return this.walk(obj["bit"]);
39848
+ }
39849
+ const parts = [];
39850
+ for (const val of Object.values(obj)) {
39851
+ if (val == null || typeof val !== "object") continue;
39852
+ const text = this.walk(val);
39853
+ if (text) parts.push(text);
39854
+ }
39855
+ return parts.join("\n");
39856
+ }
39857
+ // -- Type guards -----------------------------------------------------------
39858
+ isTextNode(obj) {
39859
+ return typeof obj["type"] === "string" && TEXT_NODE_TYPES.has(obj["type"]);
39860
+ }
39861
+ isTextAst(arr) {
39862
+ const first = arr[0];
39863
+ return typeof first === "object" && first !== null && !Array.isArray(first) && this.isTextNode(first);
39864
+ }
39865
+ isBitWrapper(obj) {
39866
+ return "bit" in obj && typeof obj["bit"] === "object" && obj["bit"] !== null && !Array.isArray(obj["bit"]);
39867
+ }
39868
+ // -- TextNode extraction ---------------------------------------------------
39869
+ textAstToPlainText(ast) {
39870
+ return ast.map((node) => this.textNodeToPlainText(node)).join("\n");
39871
+ }
39872
+ textNodeToPlainText(node) {
39873
+ const { type, text, content } = node;
39874
+ switch (type) {
39875
+ case TextNodeType.text:
39876
+ return this.textWithMarks(node);
39877
+ case TextNodeType.hardBreak:
39878
+ return "\n";
39879
+ // Block elements whose children are joined without extra separator
39880
+ case TextNodeType.paragraph:
39881
+ case TextNodeType.heading:
39882
+ case TextNodeType.section:
39883
+ case TextNodeType.gap:
39884
+ case TextNodeType.select:
39885
+ case TextNodeType.highlight:
39886
+ case TextNodeType.mark:
39887
+ case TextNodeType.codeBlock:
39888
+ return content ? content.map((c) => this.textNodeToPlainText(c)).join("") : text ?? "";
39889
+ // List items are handled by listToPlainText with indent context
39890
+ case TextNodeType.listItem:
39891
+ return content ? content.map((c) => this.textNodeToPlainText(c)).join("") : text ?? "";
39892
+ // Task item – handled by taskListToPlainText, but fallback if encountered standalone
39893
+ case TextNodeType.taskItem: {
39894
+ const checked = node.attrs?.checked ?? false;
39895
+ const prefix = checked ? "[x] " : "[ ] ";
39896
+ const itemText = content ? content.map((c) => this.textNodeToPlainText(c)).join("") : text ?? "";
39897
+ return `${prefix}${itemText}`;
39898
+ }
39899
+ // List containers – rendered with indent-aware helper
39900
+ case TextNodeType.noBulletList:
39901
+ case TextNodeType.bulletList:
39902
+ case TextNodeType.orderedList:
39903
+ case TextNodeType.orderedListRoman:
39904
+ case TextNodeType.orderedListRomanLower:
39905
+ case TextNodeType.letteredList:
39906
+ case TextNodeType.letteredListLower:
39907
+ return this.listToPlainText(node, 0);
39908
+ // Task list – rendered with indent-aware helper
39909
+ case TextNodeType.taskList:
39910
+ return this.taskListToPlainText(node, 0);
39911
+ // Images – return alt text when available
39912
+ case TextNodeType.image:
39913
+ case TextNodeType.imageInline: {
39914
+ const attrs = node.attrs;
39915
+ return attrs?.alt ?? "";
39916
+ }
39917
+ // LaTeX – return the formula source
39918
+ case TextNodeType.latex: {
39919
+ const latexAttrs = node.attrs;
39920
+ return latexAttrs?.formula ?? "";
39921
+ }
39922
+ default:
39923
+ return content ? content.map((c) => this.textNodeToPlainText(c)).join("") : text ?? "";
39924
+ }
39925
+ }
39926
+ listToPlainText(node, depth) {
39927
+ const { type, content } = node;
39928
+ if (!content || content.length === 0) return "";
39929
+ const indent = " ".repeat(depth);
39930
+ const start = node.attrs?.start ?? 1;
39931
+ const displayStart = start < 1 ? start + 1 : start;
39932
+ return content.map((child, i) => {
39933
+ const { inline, nested } = this.splitListItemContent(child, depth);
39934
+ const prefix = this.listItemPrefix(type, displayStart + i);
39935
+ const line = `${indent}${prefix}${inline}`;
39936
+ return nested ? `${line}
39937
+ ${nested}` : line;
39938
+ }).join("\n");
39939
+ }
39940
+ taskListToPlainText(node, depth) {
39941
+ const { content } = node;
39942
+ if (!content || content.length === 0) return "";
39943
+ const indent = " ".repeat(depth);
39944
+ return content.map((child) => {
39945
+ const checked = child.attrs?.checked ?? false;
39946
+ const prefix = checked ? "[x] " : "[ ] ";
39947
+ const { inline, nested } = this.splitListItemContent(child, depth);
39948
+ const line = `${indent}${prefix}${inline}`;
39949
+ return nested ? `${line}
39950
+ ${nested}` : line;
39951
+ }).join("\n");
39952
+ }
39953
+ splitListItemContent(item, depth) {
39954
+ const children = item.content ?? [];
39955
+ const inlineParts = [];
39956
+ const nestedParts = [];
39957
+ for (const child of children) {
39958
+ if (this.isListType(child.type)) {
39959
+ nestedParts.push(this.renderNestedList(child, depth + 1));
39960
+ } else {
39961
+ inlineParts.push(this.textNodeToPlainText(child));
39962
+ }
39963
+ }
39964
+ return {
39965
+ inline: inlineParts.join(""),
39966
+ nested: nestedParts.join("\n")
39967
+ };
39968
+ }
39969
+ isListType(type) {
39970
+ return type === TextNodeType.bulletList || type === TextNodeType.orderedList || type === TextNodeType.orderedListRoman || type === TextNodeType.orderedListRomanLower || type === TextNodeType.letteredList || type === TextNodeType.letteredListLower || type === TextNodeType.noBulletList || type === TextNodeType.taskList;
39971
+ }
39972
+ renderNestedList(node, depth) {
39973
+ if (node.type === TextNodeType.taskList) {
39974
+ return this.taskListToPlainText(node, depth);
39975
+ }
39976
+ return this.listToPlainText(node, depth);
39977
+ }
39978
+ listItemPrefix(listType, index) {
39979
+ switch (listType) {
39980
+ case TextNodeType.bulletList:
39981
+ return "\u2022 ";
39982
+ case TextNodeType.orderedList:
39983
+ return `${index}. `;
39984
+ case TextNodeType.orderedListRoman:
39985
+ return `${this.toRoman(index)}. `;
39986
+ case TextNodeType.orderedListRomanLower:
39987
+ return `${this.toRoman(index).toLowerCase()}. `;
39988
+ case TextNodeType.letteredList:
39989
+ return `${this.toLetter(index)}. `;
39990
+ case TextNodeType.letteredListLower:
39991
+ return `${this.toLetter(index).toLowerCase()}. `;
39992
+ case TextNodeType.noBulletList:
39993
+ default:
39994
+ return "";
39995
+ }
39996
+ }
39997
+ toRoman(num) {
39998
+ const romanNumerals = [
39999
+ [1e3, "M"],
40000
+ [900, "CM"],
40001
+ [500, "D"],
40002
+ [400, "CD"],
40003
+ [100, "C"],
40004
+ [90, "XC"],
40005
+ [50, "L"],
40006
+ [40, "XL"],
40007
+ [10, "X"],
40008
+ [9, "IX"],
40009
+ [5, "V"],
40010
+ [4, "IV"],
40011
+ [1, "I"]
40012
+ ];
40013
+ let result = "";
40014
+ let remaining = num;
40015
+ for (const [value, numeral] of romanNumerals) {
40016
+ while (remaining >= value) {
40017
+ result += numeral;
40018
+ remaining -= value;
40019
+ }
40020
+ }
40021
+ return result;
40022
+ }
40023
+ toLetter(num) {
40024
+ let result = "";
40025
+ let remaining = num;
40026
+ while (remaining > 0) {
40027
+ remaining--;
40028
+ result = String.fromCharCode(65 + remaining % 26) + result;
40029
+ remaining = Math.floor(remaining / 26);
40030
+ }
40031
+ return result;
40032
+ }
40033
+ textWithMarks(node) {
40034
+ const { text, marks } = node;
40035
+ const parts = [];
40036
+ const linkMark = marks?.find((m) => m.type === "link");
40037
+ const href = linkMark?.attrs?.href;
40038
+ if (text && href && text !== href) {
40039
+ const hrefBare = href.replace(/^https?:\/\//, "");
40040
+ if (text.includes(hrefBare)) {
40041
+ parts.push(text.replace(hrefBare, href));
40042
+ } else if (text.includes(href)) {
40043
+ parts.push(text);
40044
+ } else {
40045
+ parts.push(`${text} ${href}`);
40046
+ }
40047
+ } else if (text) {
40048
+ parts.push(text);
40049
+ } else if (href) {
40050
+ parts.push(href);
40051
+ }
40052
+ if (marks) {
40053
+ for (const mark of marks) {
40054
+ if (mark.type === "footnote") {
40055
+ const footnote = mark;
40056
+ if (footnote.attrs?.content) {
40057
+ const footnoteText = footnote.attrs.content.map((c) => this.textNodeToPlainText(c)).join("");
40058
+ if (footnoteText) parts.push(footnoteText);
40059
+ }
40060
+ }
40061
+ }
40062
+ }
40063
+ return parts.join(" ");
40064
+ }
40065
+ };
40066
+
39807
40067
  // src/info/ConfigBuilder.ts
39808
40068
  var import_node_path3 = __toESM(require("path"), 1);
39809
40069
  var import_superenum50 = require("@ncoderz/superenum");
@@ -40513,7 +40773,25 @@ var Output = {
40513
40773
  /**
40514
40774
  * Output AST as a plain JS object, or a file
40515
40775
  */
40516
- ast: "ast"
40776
+ ast: "ast",
40777
+ /**
40778
+ * Output plain text as a string, or a file
40779
+ */
40780
+ text: "text"
40781
+ };
40782
+ var InputFormat = {
40783
+ /**
40784
+ * Input is bitmark
40785
+ */
40786
+ bitmark: "bitmark",
40787
+ /**
40788
+ * Input is bitmarkText
40789
+ */
40790
+ bitmarkText: "bitmarkText",
40791
+ /**
40792
+ * Input is plain text
40793
+ */
40794
+ plainText: "plainText"
40517
40795
  };
40518
40796
  var BitmarkParserGenerator = class {
40519
40797
  constructor() {
@@ -40615,6 +40893,12 @@ var BitmarkParserGenerator = class {
40615
40893
  * - input(JSON/AST) ==> output(bitmark)
40616
40894
  * - input(bitmark) ==> output(JSON)
40617
40895
  *
40896
+ * Output type can be overridden to one of the following:
40897
+ * - bitmark: output bitmark string
40898
+ * - json: output JSON as a plain JS object, or a file
40899
+ * - ast: output AST as a plain JS object, or a file
40900
+ * - text: output plain text as a string, or a file
40901
+ *
40618
40902
  * By default, the result is returned as a string for bitmark, or a plain JS object for JSON/AST.
40619
40903
  *
40620
40904
  * The options can be used to write the output to a file and to set conversion options or override defaults.
@@ -40637,6 +40921,7 @@ var BitmarkParserGenerator = class {
40637
40921
  const outputBitmark = outputFormat === Output.bitmark;
40638
40922
  const outputJson = outputFormat === Output.json;
40639
40923
  const outputAst = outputFormat === Output.ast;
40924
+ const outputText = outputFormat === Output.text;
40640
40925
  const bitmarkParserType = BitmarkParserType.peggy;
40641
40926
  let inStr = input;
40642
40927
  const inputIsString = typeof input === "string";
@@ -40683,6 +40968,22 @@ var BitmarkParserGenerator = class {
40683
40968
  }
40684
40969
  }
40685
40970
  };
40971
+ const bitmarkToText = (bitmarkStr) => {
40972
+ ast = this.bitmarkParser.toAst(bitmarkStr, {
40973
+ parserType: bitmarkParserType
40974
+ });
40975
+ const jsonGenerator = new JsonObjectGenerator(opts);
40976
+ const json = jsonGenerator.generateSync(ast);
40977
+ const textGenerator = new PlainTextGenerator();
40978
+ const str = textGenerator.generate(json);
40979
+ if (opts.outputFile) {
40980
+ import_fs_extra4.default.writeFileSync(opts.outputFile, str, {
40981
+ encoding: "utf8"
40982
+ });
40983
+ } else {
40984
+ res = str;
40985
+ }
40986
+ };
40686
40987
  const astToBitmark = (astJson) => {
40687
40988
  if (opts.outputFile) {
40688
40989
  const generator = new BitmarkFileGenerator(opts.outputFile, opts);
@@ -40705,6 +41006,19 @@ var BitmarkParserGenerator = class {
40705
41006
  res = this.jsonStringifyPrettify(json, jsonOptions);
40706
41007
  }
40707
41008
  };
41009
+ const astToText = (astJson) => {
41010
+ const jsonGenerator = new JsonObjectGenerator(opts);
41011
+ const json = jsonGenerator.generateSync(astJson);
41012
+ const textGenerator = new PlainTextGenerator();
41013
+ const str = textGenerator.generate(json);
41014
+ if (opts.outputFile) {
41015
+ import_fs_extra4.default.writeFileSync(opts.outputFile, str, {
41016
+ encoding: "utf8"
41017
+ });
41018
+ } else {
41019
+ res = str;
41020
+ }
41021
+ };
40708
41022
  const jsonToBitmark = (astJson) => {
40709
41023
  if (opts.outputFile) {
40710
41024
  const generator = new BitmarkFileGenerator(opts.outputFile, opts);
@@ -40717,6 +41031,19 @@ var BitmarkParserGenerator = class {
40717
41031
  const jsonToAst = (astJson) => {
40718
41032
  res = this.jsonStringifyPrettify(astJson, jsonOptions);
40719
41033
  };
41034
+ const jsonToText = (astJson) => {
41035
+ const jsonGenerator = new JsonObjectGenerator(opts);
41036
+ const json = jsonGenerator.generateSync(astJson);
41037
+ const textGenerator = new PlainTextGenerator();
41038
+ const str = textGenerator.generate(json);
41039
+ if (opts.outputFile) {
41040
+ import_fs_extra4.default.writeFileSync(opts.outputFile, str, {
41041
+ encoding: "utf8"
41042
+ });
41043
+ } else {
41044
+ res = str;
41045
+ }
41046
+ };
40720
41047
  const jsonToJson = (astJson) => {
40721
41048
  astToJson(astJson);
40722
41049
  };
@@ -40725,6 +41052,8 @@ var BitmarkParserGenerator = class {
40725
41052
  bitmarkToBitmark(inStr);
40726
41053
  } else if (outputAst) {
40727
41054
  bitmarkToAst(inStr);
41055
+ } else if (outputText) {
41056
+ bitmarkToText(inStr);
40728
41057
  } else {
40729
41058
  bitmarkToJson(inStr);
40730
41059
  }
@@ -40734,6 +41063,8 @@ var BitmarkParserGenerator = class {
40734
41063
  astToAst(ast);
40735
41064
  } else if (outputJson) {
40736
41065
  astToJson(ast);
41066
+ } else if (outputText) {
41067
+ astToText(ast);
40737
41068
  } else {
40738
41069
  astToBitmark(ast);
40739
41070
  }
@@ -40743,6 +41074,8 @@ var BitmarkParserGenerator = class {
40743
41074
  jsonToJson(ast);
40744
41075
  } else if (outputAst) {
40745
41076
  jsonToAst(ast);
41077
+ } else if (outputText) {
41078
+ jsonToText(ast);
40746
41079
  } else {
40747
41080
  jsonToBitmark(ast);
40748
41081
  }
@@ -40871,7 +41204,7 @@ var BitmarkParserGenerator = class {
40871
41204
  return res;
40872
41205
  }
40873
41206
  /**
40874
- * Convert bitmark text from JSON, or JSON to bitmark text.
41207
+ * Convert bitmark text to JSON, or JSON to bitmark text.
40875
41208
  *
40876
41209
  * Input type is detected automatically and may be:
40877
41210
  * - string: bitmark text or JSON
@@ -40945,6 +41278,54 @@ var BitmarkParserGenerator = class {
40945
41278
  }
40946
41279
  return res;
40947
41280
  }
41281
+ extractPlainText(input, options) {
41282
+ const dataIn = input;
41283
+ const inputFormat = options?.inputFormat;
41284
+ const isString2 = typeof input === "string";
41285
+ let data;
41286
+ const parseAutomatically = () => {
41287
+ let dataOut = dataIn;
41288
+ if (typeof dataIn === "string") {
41289
+ try {
41290
+ dataOut = JSON.parse(dataIn);
41291
+ } catch (_e) {
41292
+ let isBitmark = false;
41293
+ const bitmarkData = this.convert(dataIn, {
41294
+ outputFormat: Output.json
41295
+ });
41296
+ if (bitmarkData.length > 0) {
41297
+ const isError = bitmarkData[0].bit.type === BitType._error;
41298
+ if (!isError) {
41299
+ isBitmark = true;
41300
+ dataOut = bitmarkData;
41301
+ }
41302
+ }
41303
+ if (!isBitmark) {
41304
+ dataOut = this.convertText(dataIn, {
41305
+ textFormat: TextFormat.bitmarkText
41306
+ });
41307
+ }
41308
+ }
41309
+ }
41310
+ return dataOut;
41311
+ };
41312
+ if (inputFormat === InputFormat.bitmark) {
41313
+ data = this.convert(dataIn, {
41314
+ outputFormat: Output.json
41315
+ });
41316
+ } else if (inputFormat === InputFormat.bitmarkText) {
41317
+ data = this.convertText(dataIn, {
41318
+ textFormat: TextFormat.bitmarkText
41319
+ });
41320
+ } else if (inputFormat === InputFormat.plainText) {
41321
+ if (isString2) data = String(input);
41322
+ } else {
41323
+ data = parseAutomatically();
41324
+ }
41325
+ const generator = new PlainTextGenerator();
41326
+ const res = generator.generate(data);
41327
+ return res;
41328
+ }
40948
41329
  /**
40949
41330
  * Breakscape bitmark text.
40950
41331
  *
@@ -41052,6 +41433,16 @@ var BitmarkParserGenerator = class {
41052
41433
  }
41053
41434
  return;
41054
41435
  }
41436
+ textAstToPlainText(textAst, _options) {
41437
+ const textGenerator = new TextGenerator(BitmarkVersion.v3, {
41438
+ //
41439
+ });
41440
+ const res = textGenerator.generateSync(textAst, TextFormat.bitmarkText, TextLocation.body, {
41441
+ noBreakscaping: true,
41442
+ noMarkup: true
41443
+ });
41444
+ return res;
41445
+ }
41055
41446
  /**
41056
41447
  * Get the supported bits as a formatted strings
41057
41448
  *
@@ -41122,6 +41513,7 @@ init();
41122
41513
  InfoFormat,
41123
41514
  InfoType,
41124
41515
  Input,
41516
+ InputFormat,
41125
41517
  JsonFileGenerator,
41126
41518
  JsonGenerator,
41127
41519
  JsonParser,