react-native-pageindex 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/CHANGELOG.md +25 -0
  2. package/LICENSE +21 -0
  3. package/README.md +405 -0
  4. package/dist/config.d.ts +4 -0
  5. package/dist/config.d.ts.map +1 -0
  6. package/dist/config.js +22 -0
  7. package/dist/config.js.map +1 -0
  8. package/dist/index.d.ts +49 -0
  9. package/dist/index.d.ts.map +1 -0
  10. package/dist/index.js +75 -0
  11. package/dist/index.js.map +1 -0
  12. package/dist/pageIndex.d.ts +48 -0
  13. package/dist/pageIndex.d.ts.map +1 -0
  14. package/dist/pageIndex.js +962 -0
  15. package/dist/pageIndex.js.map +1 -0
  16. package/dist/pageIndexDocument.d.ts +85 -0
  17. package/dist/pageIndexDocument.d.ts.map +1 -0
  18. package/dist/pageIndexDocument.js +145 -0
  19. package/dist/pageIndexDocument.js.map +1 -0
  20. package/dist/pageIndexMd.d.ts +31 -0
  21. package/dist/pageIndexMd.d.ts.map +1 -0
  22. package/dist/pageIndexMd.js +260 -0
  23. package/dist/pageIndexMd.js.map +1 -0
  24. package/dist/parsers/csv.d.ts +17 -0
  25. package/dist/parsers/csv.d.ts.map +1 -0
  26. package/dist/parsers/csv.js +147 -0
  27. package/dist/parsers/csv.js.map +1 -0
  28. package/dist/parsers/docx.d.ts +20 -0
  29. package/dist/parsers/docx.d.ts.map +1 -0
  30. package/dist/parsers/docx.js +134 -0
  31. package/dist/parsers/docx.js.map +1 -0
  32. package/dist/parsers/xlsx.d.ts +19 -0
  33. package/dist/parsers/xlsx.d.ts.map +1 -0
  34. package/dist/parsers/xlsx.js +121 -0
  35. package/dist/parsers/xlsx.js.map +1 -0
  36. package/dist/reverseIndex.d.ts +39 -0
  37. package/dist/reverseIndex.d.ts.map +1 -0
  38. package/dist/reverseIndex.js +248 -0
  39. package/dist/reverseIndex.js.map +1 -0
  40. package/dist/types.d.ts +190 -0
  41. package/dist/types.d.ts.map +1 -0
  42. package/dist/types.js +4 -0
  43. package/dist/types.js.map +1 -0
  44. package/dist/utils/json.d.ts +13 -0
  45. package/dist/utils/json.d.ts.map +1 -0
  46. package/dist/utils/json.js +69 -0
  47. package/dist/utils/json.js.map +1 -0
  48. package/dist/utils/pdf.d.ts +20 -0
  49. package/dist/utils/pdf.d.ts.map +1 -0
  50. package/dist/utils/pdf.js +96 -0
  51. package/dist/utils/pdf.js.map +1 -0
  52. package/dist/utils/progress.d.ts +29 -0
  53. package/dist/utils/progress.d.ts.map +1 -0
  54. package/dist/utils/progress.js +59 -0
  55. package/dist/utils/progress.js.map +1 -0
  56. package/dist/utils/tokens.d.ts +7 -0
  57. package/dist/utils/tokens.d.ts.map +1 -0
  58. package/dist/utils/tokens.js +12 -0
  59. package/dist/utils/tokens.js.map +1 -0
  60. package/dist/utils/tree.d.ts +88 -0
  61. package/dist/utils/tree.d.ts.map +1 -0
  62. package/dist/utils/tree.js +365 -0
  63. package/dist/utils/tree.js.map +1 -0
  64. package/package.json +76 -0
@@ -0,0 +1,96 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.extractPdfPages = extractPdfPages;
37
+ const tokens_1 = require("./tokens");
38
+ /**
39
+ * Extracts per-page text from a PDF buffer using `pdfjs-dist`.
40
+ *
41
+ * This is an **optional helper** — install `pdfjs-dist` (>=4.0.0) to use it.
42
+ * If you already have page text (e.g., from `react-native-pdf` or a backend),
43
+ * you can pass `PageData[]` directly to `pageIndex()` without calling this.
44
+ *
45
+ * @param data Raw PDF bytes (ArrayBuffer or Uint8Array)
46
+ * @param counter Token counter function (defaults to ~4 chars/token)
47
+ * @returns Array of `{ text, tokenCount }` — one entry per page
48
+ *
49
+ * @example
50
+ * import RNFS from 'react-native-fs';
51
+ * const base64 = await RNFS.readFile(filePath, 'base64');
52
+ * const bytes = Uint8Array.from(atob(base64), c => c.charCodeAt(0));
53
+ * const pages = await extractPdfPages(bytes.buffer as ArrayBuffer);
54
+ */
55
+ async function extractPdfPages(data, counter = tokens_1.defaultTokenCounter) {
56
+ // Dynamically import pdfjs-dist so projects that don't need PDF parsing
57
+ // don't get a hard dependency / bundler error.
58
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
59
+ let pdfjsLib;
60
+ try {
61
+ // Try the legacy build first (better compatibility with non-browser envs)
62
+ pdfjsLib = await Promise.resolve(`${'pdfjs-dist/legacy/build/pdf'}`).then(s => __importStar(require(s)));
63
+ }
64
+ catch {
65
+ try {
66
+ pdfjsLib = await Promise.resolve(`${'pdfjs-dist'}`).then(s => __importStar(require(s)));
67
+ }
68
+ catch {
69
+ throw new Error('[PageIndex] pdfjs-dist is not installed. ' +
70
+ 'Run: npm install pdfjs-dist (or yarn add pdfjs-dist)');
71
+ }
72
+ }
73
+ // Normalise to ArrayBuffer (Uint8Array.buffer can be SharedArrayBuffer in some envs)
74
+ const buffer = data instanceof Uint8Array
75
+ ? data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength)
76
+ : data;
77
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access
78
+ const loadingTask = pdfjsLib.getDocument({ data: buffer });
79
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-unsafe-member-access
80
+ const doc = await loadingTask.promise;
81
+ const pages = [];
82
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
83
+ for (let i = 1; i <= doc.numPages; i++) {
84
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-assignment
85
+ const page = await doc.getPage(i);
86
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-call, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-assignment
87
+ const content = await page.getTextContent();
88
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call
89
+ const text = content.items
90
+ .map((item) => (typeof item['str'] === 'string' ? item['str'] : ''))
91
+ .join(' ');
92
+ pages.push({ text, tokenCount: counter(text) });
93
+ }
94
+ return pages;
95
+ }
96
+ //# sourceMappingURL=pdf.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pdf.js","sourceRoot":"","sources":["../../src/utils/pdf.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAoBA,0CAgDC;AAnED,qCAA+C;AAE/C;;;;;;;;;;;;;;;;GAgBG;AACI,KAAK,UAAU,eAAe,CACnC,IAA8B,EAC9B,UAAwB,4BAAmB;IAE3C,wEAAwE;IACxE,+CAA+C;IAC/C,8DAA8D;IAC9D,IAAI,QAAa,CAAC;IAClB,IAAI,CAAC;QACH,0EAA0E;QAC1E,QAAQ,GAAG,yBAAuC,6BAAuC,uCAAC,CAAC;IAC7F,CAAC;IAAC,MAAM,CAAC;QACP,IAAI,CAAC;YACH,QAAQ,GAAG,yBAAuC,YAAsB,uCAAC,CAAC;QAC5E,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,IAAI,KAAK,CACb,2CAA2C;gBACzC,uDAAuD,CAC1D,CAAC;QACJ,CAAC;IACH,CAAC;IAED,qFAAqF;IACrF,MAAM,MAAM,GACV,IAAI,YAAY,UAAU;QACxB,CAAC,CAAE,IAAI,CAAC,MAAsB,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC;QACxF,CAAC,CAAE,IAAoB,CAAC;IAE5B,yGAAyG;IACzG,MAAM,WAAW,GAAG,QAAQ,CAAC,WAAW,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;IAC3D,+GAA+G;IAC/G,MAAM,GAAG,GAAG,MAAM,WAAW,CAAC,OAAO,CAAC;IAEtC,MAAM,KAAK,GAAe,EAAE,CAAC;IAC7B,sEAAsE;IACtE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,kJAAkJ;QAClJ,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QAClC,kJAAkJ;QAClJ,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,cAAc,EAAE,CAAC;QAC5C,yGAAyG;QACzG,MAAM,IAAI,GAAY,OAAO,CAAC,KAAwC;aACnE,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;aACnE,IAAI,CAAC,GAAG,CAAC,CAAC;QACb,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAClD,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC"}
@@ -0,0 +1,29 @@
1
+ import type { ProgressCallback } from '../types';
2
+ /**
3
+ * Manages progress reporting across an ordered list of named steps.
4
+ * Each call to `report()` (or `advance()`) moves to the next step and
5
+ * fires the user-supplied `onProgress` callback.
6
+ */
7
+ export declare class ProgressReporter {
8
+ private readonly cb;
9
+ private readonly steps;
10
+ private currentIndex;
11
+ constructor(steps: readonly string[], onProgress?: ProgressCallback);
12
+ /**
13
+ * Report progress for a specific named step.
14
+ * Looks the step up in the list to compute the correct percentage.
15
+ */
16
+ report(step: string, detail?: string): void;
17
+ /**
18
+ * Advance to the next step automatically (for sub-steps not in the list).
19
+ */
20
+ advance(step: string, detail?: string): void;
21
+ /**
22
+ * Report an exact percentage override (useful for per-page loops).
23
+ */
24
+ reportAt(percent: number, step: string, detail?: string): void;
25
+ private emit;
26
+ }
27
+ /** No-op reporter used when no callback is supplied. */
28
+ export declare const NULL_REPORTER: ProgressReporter;
29
+ //# sourceMappingURL=progress.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"progress.d.ts","sourceRoot":"","sources":["../../src/utils/progress.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAgB,MAAM,UAAU,CAAC;AAE/D;;;;GAIG;AACH,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,QAAQ,CAAC,EAAE,CAA+B;IAClD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAoB;IAC1C,OAAO,CAAC,YAAY,CAAK;gBAEb,KAAK,EAAE,SAAS,MAAM,EAAE,EAAE,UAAU,CAAC,EAAE,gBAAgB;IAKnE;;;OAGG;IACH,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI;IAQ3C;;OAEG;IACH,OAAO,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI;IAM5C;;OAEG;IACH,QAAQ,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI;IAK9D,OAAO,CAAC,IAAI;CAOb;AAED,wDAAwD;AACxD,eAAO,MAAM,aAAa,kBAA2B,CAAC"}
@@ -0,0 +1,59 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.NULL_REPORTER = exports.ProgressReporter = void 0;
4
+ /**
5
+ * Manages progress reporting across an ordered list of named steps.
6
+ * Each call to `report()` (or `advance()`) moves to the next step and
7
+ * fires the user-supplied `onProgress` callback.
8
+ */
9
+ class ProgressReporter {
10
+ cb;
11
+ steps;
12
+ currentIndex = 0;
13
+ constructor(steps, onProgress) {
14
+ this.steps = steps;
15
+ this.cb = onProgress;
16
+ }
17
+ /**
18
+ * Report progress for a specific named step.
19
+ * Looks the step up in the list to compute the correct percentage.
20
+ */
21
+ report(step, detail) {
22
+ if (!this.cb)
23
+ return;
24
+ const idx = this.steps.indexOf(step);
25
+ if (idx !== -1)
26
+ this.currentIndex = idx;
27
+ const percent = Math.round((this.currentIndex / Math.max(this.steps.length - 1, 1)) * 100);
28
+ this.emit({ step, percent, detail });
29
+ }
30
+ /**
31
+ * Advance to the next step automatically (for sub-steps not in the list).
32
+ */
33
+ advance(step, detail) {
34
+ if (!this.cb)
35
+ return;
36
+ const percent = Math.round((this.currentIndex / Math.max(this.steps.length - 1, 1)) * 100);
37
+ this.emit({ step, percent, detail });
38
+ }
39
+ /**
40
+ * Report an exact percentage override (useful for per-page loops).
41
+ */
42
+ reportAt(percent, step, detail) {
43
+ if (!this.cb)
44
+ return;
45
+ this.emit({ step, percent: Math.min(100, Math.max(0, Math.round(percent))), detail });
46
+ }
47
+ emit(info) {
48
+ try {
49
+ this.cb?.(info);
50
+ }
51
+ catch {
52
+ // Never let a user callback crash the pipeline
53
+ }
54
+ }
55
+ }
56
+ exports.ProgressReporter = ProgressReporter;
57
+ /** No-op reporter used when no callback is supplied. */
58
+ exports.NULL_REPORTER = new ProgressReporter([]);
59
+ //# sourceMappingURL=progress.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"progress.js","sourceRoot":"","sources":["../../src/utils/progress.ts"],"names":[],"mappings":";;;AAEA;;;;GAIG;AACH,MAAa,gBAAgB;IACV,EAAE,CAA+B;IACjC,KAAK,CAAoB;IAClC,YAAY,GAAG,CAAC,CAAC;IAEzB,YAAY,KAAwB,EAAE,UAA6B;QACjE,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,EAAE,GAAG,UAAU,CAAC;IACvB,CAAC;IAED;;;OAGG;IACH,MAAM,CAAC,IAAY,EAAE,MAAe;QAClC,IAAI,CAAC,IAAI,CAAC,EAAE;YAAE,OAAO;QACrB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QACrC,IAAI,GAAG,KAAK,CAAC,CAAC;YAAE,IAAI,CAAC,YAAY,GAAG,GAAG,CAAC;QACxC,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC;QAC3F,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;IACvC,CAAC;IAED;;OAEG;IACH,OAAO,CAAC,IAAY,EAAE,MAAe;QACnC,IAAI,CAAC,IAAI,CAAC,EAAE;YAAE,OAAO;QACrB,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC;QAC3F,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;IACvC,CAAC;IAED;;OAEG;IACH,QAAQ,CAAC,OAAe,EAAE,IAAY,EAAE,MAAe;QACrD,IAAI,CAAC,IAAI,CAAC,EAAE;YAAE,OAAO;QACrB,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IACxF,CAAC;IAEO,IAAI,CAAC,IAAkB;QAC7B,IAAI,CAAC;YACH,IAAI,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;QAClB,CAAC;QAAC,MAAM,CAAC;YACP,+CAA+C;QACjD,CAAC;IACH,CAAC;CACF;AA9CD,4CA8CC;AAED,wDAAwD;AAC3C,QAAA,aAAa,GAAG,IAAI,gBAAgB,CAAC,EAAE,CAAC,CAAC"}
@@ -0,0 +1,7 @@
1
+ import type { TokenCounter } from '../types';
2
+ /**
3
+ * Default token counter: ~4 characters per token (GPT-4 average for English).
4
+ * For accurate counts, pass a custom `tokenCounter` using `js-tiktoken`.
5
+ */
6
+ export declare const defaultTokenCounter: TokenCounter;
7
+ //# sourceMappingURL=tokens.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tokens.d.ts","sourceRoot":"","sources":["../../src/utils/tokens.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AAE7C;;;GAGG;AACH,eAAO,MAAM,mBAAmB,EAAE,YAEjC,CAAC"}
@@ -0,0 +1,12 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.defaultTokenCounter = void 0;
4
+ /**
5
+ * Default token counter: ~4 characters per token (GPT-4 average for English).
6
+ * For accurate counts, pass a custom `tokenCounter` using `js-tiktoken`.
7
+ */
8
+ const defaultTokenCounter = (text) => {
9
+ return Math.ceil((text ?? '').length / 4);
10
+ };
11
+ exports.defaultTokenCounter = defaultTokenCounter;
12
+ //# sourceMappingURL=tokens.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tokens.js","sourceRoot":"","sources":["../../src/utils/tokens.ts"],"names":[],"mappings":";;;AAEA;;;GAGG;AACI,MAAM,mBAAmB,GAAiB,CAAC,IAAY,EAAU,EAAE;IACxE,OAAO,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AAC5C,CAAC,CAAC;AAFW,QAAA,mBAAmB,uBAE9B"}
@@ -0,0 +1,88 @@
1
+ import type { TreeNode, PageData } from '../types';
2
+ /**
3
+ * Recursively writes sequential zero-padded node_id values to every node.
4
+ * Port of `write_node_id()` from utils.py
5
+ */
6
+ export declare function writeNodeId(data: TreeNode | TreeNode[], nodeId?: number): number;
7
+ /**
8
+ * Returns all nodes as a flat list (each node without its `nodes` children).
9
+ * Port of `get_nodes()` from utils.py
10
+ */
11
+ export declare function getNodes(structure: TreeNode | TreeNode[]): TreeNode[];
12
+ /**
13
+ * Flattens the tree into a list where each element still contains its `nodes`.
14
+ * Port of `structure_to_list()` from utils.py
15
+ */
16
+ export declare function structureToList(structure: TreeNode | TreeNode[]): TreeNode[];
17
+ /**
18
+ * Returns only leaf nodes (nodes with no children).
19
+ * Port of `get_leaf_nodes()` from utils.py
20
+ */
21
+ export declare function getLeafNodes(structure: TreeNode | TreeNode[]): TreeNode[];
22
+ /**
23
+ * Converts a flat list with `structure` index codes (e.g. "1.2.3") into a
24
+ * nested tree. Port of `list_to_tree()` from utils.py
25
+ */
26
+ export declare function listToTree(data: TreeNode[]): TreeNode[];
27
+ /**
28
+ * Converts a flat TOC list (with `physical_index`) into a tree, assigning
29
+ * `start_index` and `end_index` to each node.
30
+ * Port of `post_processing()` from utils.py
31
+ */
32
+ export declare function postProcessing(structure: TreeNode[], endPhysicalIndex: number): TreeNode[];
33
+ /**
34
+ * Inserts a "Preface" node at the beginning if the first section starts
35
+ * after page 1. Port of `add_preface_if_needed()` from utils.py
36
+ */
37
+ export declare function addPrefaceIfNeeded(data: TreeNode[]): TreeNode[];
38
+ /**
39
+ * Attaches raw page text to each node based on its start/end indices.
40
+ * Port of `add_node_text()` from utils.py
41
+ */
42
+ export declare function addNodeText(node: TreeNode | TreeNode[], pages: PageData[]): void;
43
+ /**
44
+ * Same as `addNodeText` but wraps text in `<physical_index_X>` tags.
45
+ * Port of `add_node_text_with_labels()` from utils.py
46
+ */
47
+ export declare function addNodeTextWithLabels(node: TreeNode | TreeNode[], pages: PageData[]): void;
48
+ /**
49
+ * Recursively removes specified fields from all nodes.
50
+ * Port of `remove_fields()` from utils.py
51
+ */
52
+ export declare function removeFields(data: unknown, fields?: string[]): unknown;
53
+ /**
54
+ * Removes the `text` field from all nodes in-place.
55
+ * Port of `remove_structure_text()` from utils.py
56
+ */
57
+ export declare function removeStructureText(data: TreeNode | TreeNode[]): TreeNode | TreeNode[];
58
+ type KeyOrder = string[];
59
+ /**
60
+ * Re-orders keys of each node and optionally removes empty `nodes` arrays.
61
+ * Port of `format_structure()` from utils.py
62
+ */
63
+ export declare function formatStructure(structure: TreeNode | TreeNode[], order?: KeyOrder): TreeNode | TreeNode[];
64
+ /**
65
+ * Creates a minimal structure (only title, node_id, summary, prefix_summary)
66
+ * suitable for document description generation.
67
+ * Port of `create_clean_structure_for_description()` from utils.py
68
+ */
69
+ export declare function createCleanStructureForDescription(structure: TreeNode | TreeNode[]): TreeNode | TreeNode[];
70
+ /**
71
+ * Sets `physical_index` to null for any TOC item that references a page
72
+ * beyond the actual document length.
73
+ * Port of `validate_and_truncate_physical_indices()` from utils.py
74
+ */
75
+ export declare function validateAndTruncatePhysicalIndices(tocItems: TreeNode[], pageListLength: number, startIndex?: number): TreeNode[];
76
+ /**
77
+ * Converts string-form `<physical_index_X>` values to integers in-place.
78
+ * Port of `convert_physical_index_to_int()` from utils.py
79
+ */
80
+ export declare function convertPhysicalIndexToInt(data: TreeNode[] | string): TreeNode[] | number | null;
81
+ /**
82
+ * Converts string `page` values to numbers in-place.
83
+ * Port of `convert_page_to_int()` from utils.py
84
+ */
85
+ export declare function convertPageToInt(data: TreeNode[]): TreeNode[];
86
+ export declare function deepClone<T>(value: T): T;
87
+ export {};
88
+ //# sourceMappingURL=tree.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tree.d.ts","sourceRoot":"","sources":["../../src/utils/tree.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AAInD;;;GAGG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,QAAQ,GAAG,QAAQ,EAAE,EAAE,MAAM,SAAI,GAAG,MAAM,CAa3E;AAID;;;GAGG;AACH,wBAAgB,QAAQ,CAAC,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,GAAG,QAAQ,EAAE,CAWrE;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAAC,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,GAAG,QAAQ,EAAE,CAS5E;AAED;;;GAGG;AACH,wBAAgB,YAAY,CAAC,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,GAAG,QAAQ,EAAE,CAUzE;AASD;;;GAGG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE,CAgCvD;AAID;;;;GAIG;AACH,wBAAgB,cAAc,CAAC,SAAS,EAAE,QAAQ,EAAE,EAAE,gBAAgB,EAAE,MAAM,GAAG,QAAQ,EAAE,CA0B1F;AAED;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE,CAU/D;AAoBD;;;GAGG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,QAAQ,GAAG,QAAQ,EAAE,EAAE,KAAK,EAAE,QAAQ,EAAE,GAAG,IAAI,CAShF;AAED;;;GAGG;AACH,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,QAAQ,GAAG,QAAQ,EAAE,EAAE,KAAK,EAAE,QAAQ,EAAE,GAAG,IAAI,CAS1F;AAID;;;GAGG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,OAAO,EAAE,MAAM,GAAE,MAAM,EAAa,GAAG,OAAO,CAYhF;AAED;;;GAGG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,QAAQ,GAAG,QAAQ,EAAE,GAAG,QAAQ,GAAG,QAAQ,EAAE,CAQtF;AAID,KAAK,QAAQ,GAAG,MAAM,EAAE,CAAC;AAMzB;;;GAGG;AACH,wBAAgB,eAAe,CAC7B,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,EAChC,KAAK,CAAC,EAAE,QAAQ,GACf,QAAQ,GAAG,QAAQ,EAAE,CAgBvB;AAID;;;;GAIG;AACH,wBAAgB,kCAAkC,CAChD,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,GAC/B,QAAQ,GAAG,QAAQ,EAAE,CAYvB;AAID;;;;GAIG;AACH,wBAAgB,kCAAkC,CAChD,QAAQ,EAAE,QAAQ,EAAE,EACpB,cAAc,EAAE,MAAM,EACtB,UAAU,SAAI,GACb,QAAQ,EAAE,CAkBZ;AAID;;;GAGG;AACH,wBAAgB,yBAAyB,CACvC,IAAI,EAAE,QAAQ,EAAE,GAAG,MAAM,GACxB,QAAQ,EAAE,GAAG,MAAM,GAAG,IAAI,CAgB5B;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE,CAQ7D;AAWD,wBAAgB,SAAS,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,GAAG,CAAC,CAGxC"}