full-json-extractor 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,48 @@
1
+ # Robust Top-Level JSON Extractor
2
+
3
+ ## Overview
4
+
5
+ This library only extracts all **highest-level valid JSON objects** from noisy or malformed text streams. It is designed to handle input where JSON objects may be embedded in extra braces, log headers, or partially malformed data, while still guaranteeing that extracted objects are parseable by `JSON.parse`.
6
+
7
+ This package is meant to be used with payloads under 1MB with somewhat reasonably shaped JSON data, as worst-case time complexity is O(n^3), despite pruning techniques.
8
+
9
+ ---
10
+
11
+ ## Features
12
+
13
+ - Extracts multiple valid top-level JSON objects from a single string.
14
+ - Ignores unmatched braces, extra characters, or malformed sections.
15
+ - Works with escaped quotes inside JSON strings.
16
+ - Returns list of jsons derived from original input.
17
+
18
+ ---
19
+
20
+ ## Installation
21
+
22
+ ```bash
23
+ npm install full-json-extractor
24
+ ```
25
+
26
+ ---
27
+
28
+ ## Usage
29
+
30
+ ```
31
+ import { extractJsons } from 'full-json-extractor';
32
+
33
+ const rawString = `[log header] { "id": 1, "data": { "key": "value" } } more text`;
34
+ const jsonObjects = extractJsons(rawString);
35
+
36
+ console.log(jsonObjects);
37
+ // Output: [{ id: 1, data: { key: 'value' } }]
38
+ ```
39
+
40
+ ---
41
+
42
+ ## API
43
+
44
+ ```
45
+ extractJsons(input: string, limit: Limit = "none"): object[]
46
+ ```
47
+
48
+ Wrapper for Internal BFS/memoization routine to find all valid JSON intervals.
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=benchmark.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"benchmark.d.ts","sourceRoot":"","sources":["../src/benchmark.ts"],"names":[],"mappings":""}
@@ -0,0 +1,49 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ const node_perf_hooks_1 = require("node:perf_hooks");
7
+ const node_process_1 = __importDefault(require("node:process"));
8
+ const extractor_js_1 = require("./extractor.js");
9
+ function generateNestedJSON(depth) {
10
+ let obj = { value: "test" };
11
+ for (let i = 0; i < depth; i++) {
12
+ obj = { nested: obj };
13
+ }
14
+ return obj;
15
+ }
16
+ function generateRawString(depth, count, sizeFactor) {
17
+ const jsons = [];
18
+ for (let i = 0; i < count; i++) {
19
+ const base = generateNestedJSON(depth);
20
+ base.padding = "x".repeat(sizeFactor);
21
+ jsons.push(JSON.stringify(base));
22
+ }
23
+ return jsons.join(" some text in between ");
24
+ }
25
+ function getMemoryUsageMB() {
26
+ const used = node_process_1.default.memoryUsage().heapUsed / 1024 / 1024;
27
+ return Math.round(used * 100) / 100;
28
+ }
29
+ async function runBenchmark() {
30
+ const depths = [1, 3, 5, 10, 20];
31
+ const sizes = [0, 1000, 10000, 50_000]; // padding sizes
32
+ const counts = [1, 5, 10, 20]; // number of JSONs per string
33
+ for (const depth of depths) {
34
+ for (const size of sizes) {
35
+ for (const count of counts) {
36
+ const input = generateRawString(depth, count, size);
37
+ const memBefore = getMemoryUsageMB();
38
+ const start = node_perf_hooks_1.performance.now();
39
+ const result = (0, extractor_js_1.extractJsons)(input);
40
+ const end = node_perf_hooks_1.performance.now();
41
+ const memAfter = getMemoryUsageMB();
42
+ console.log(`Depth=${depth}, Size=${size}, Count=${count} | Time=${(end - start).toFixed(3)} ms | ` +
43
+ `MemΔ=${(memAfter - memBefore).toFixed(3)} MB | OutputLen=${JSON.stringify(result).length}`);
44
+ }
45
+ }
46
+ }
47
+ }
48
+ runBenchmark();
49
+ //# sourceMappingURL=benchmark.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"benchmark.js","sourceRoot":"","sources":["../src/benchmark.ts"],"names":[],"mappings":";;;;;AAAA,qDAA8C;AAC9C,gEAAmC;AACnC,iDAA8C;AAE9C,SAAS,kBAAkB,CAAC,KAAa;IACvC,IAAI,GAAG,GAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IACjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;QAC/B,GAAG,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC;IACxB,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,iBAAiB,CACxB,KAAa,EACb,KAAa,EACb,UAAkB;IAElB,MAAM,KAAK,GAAG,EAAE,CAAC;IACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;QAC/B,MAAM,IAAI,GAAG,kBAAkB,CAAC,KAAK,CAAC,CAAC;QACtC,IAAY,CAAC,OAAO,GAAG,GAAG,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QAC/C,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC;IACnC,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;AAC9C,CAAC;AAED,SAAS,gBAAgB;IACvB,MAAM,IAAI,GAAG,sBAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,GAAG,IAAI,GAAG,IAAI,CAAC;IAC1D,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;AACtC,CAAC;AAED,KAAK,UAAU,YAAY;IACzB,MAAM,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC;IACjC,MAAM,KAAK,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,gBAAgB;IACxD,MAAM,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,6BAA6B;IAE5D,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,MAAM,KAAK,GAAG,iBAAiB,CAAC,KAAK,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC;gBAEpD,MAAM,SAAS,GAAG,gBAAgB,EAAE,CAAC;gBACrC,MAAM,KAAK,GAAG,6BAAW,CAAC,GAAG,EAAE,CAAC;gBAEhC,MAAM,MAAM,GAAG,IAAA,2BAAY,EAAC,KAAK,CAAC,CAAC;gBAEnC,MAAM,GAAG,GAAG,6BAAW,CAAC,GAAG,EAAE,CAAC;gBAC9B,MAAM,QAAQ,GAAG,gBAAgB,EAAE,CAAC;gBAEpC,OAAO,CAAC,GAAG,CACT,SAAS,KAAK,UAAU,IAAI,WAAW,KAAK,WAAW,CAAC,GAAG,GAAG,KAAK,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,QAAQ;oBACrF,QAAQ,CAAC,QAAQ,GAAG,SAAS,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,mBAAmB,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,MAAM,EAAE,CAC9F,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC;AACH,CAAC;AAED,YAAY,EAAE,CAAC"}
@@ -0,0 +1,10 @@
1
+ import { Limit } from "./interfaces";
2
+ /**
3
+ * Extracts json objects from a given input string
4
+ * @param input input string
5
+ * @param limit Sets pre-check behavior. If set to 'log2', method will terminate pre-check after reaching log2(n) characters. Useful for malformed data i.e. many {}
6
+ * Else, will do a O(n) scan to coarsely validate brace matches. Useful for many json objects (i.e. early termination)
7
+ * @returns
8
+ */
9
+ export declare function extractJsons(input: string, limit?: Limit): object[];
10
+ //# sourceMappingURL=extractor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"extractor.d.ts","sourceRoot":"","sources":["../src/extractor.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,KAAK,EAEN,MAAM,cAAc,CAAC;AAgMtB;;;;;;GAMG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,GAAE,KAAc,GAAG,MAAM,EAAE,CAW3E"}
@@ -0,0 +1,180 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.extractJsons = extractJsons;
7
+ const queue_1 = require("./queue");
8
+ const interval_tree_1d_1 = __importDefault(require("interval-tree-1d"));
9
+ const LBRACE = "{";
10
+ const RBRACE = "}";
11
+ class JsonParseError extends Error {
12
+ }
13
+ function convertMemoPositionToKey(memoPosition) {
14
+ return `${memoPosition.left}-${memoPosition.right}`;
15
+ }
16
+ function generateBracesPrefixAndSufix(input) {
17
+ const prefix = [];
18
+ const suffix = [];
19
+ const braceCounter = new Map();
20
+ for (let i = 0; i < input.length; i++) {
21
+ if (input[i] == LBRACE) {
22
+ prefix.push(i);
23
+ braceCounter.set(i, prefix.length - suffix.length);
24
+ }
25
+ else if (input[i] == RBRACE) {
26
+ suffix.push(i);
27
+ braceCounter.set(i, prefix.length - suffix.length);
28
+ }
29
+ }
30
+ return {
31
+ prefix,
32
+ suffix,
33
+ };
34
+ }
35
+ function queryIntervalSync(tree, low, high) {
36
+ let intervalExists = false;
37
+ tree.queryInterval(low, high, (interval) => {
38
+ const [left, right] = interval;
39
+ if (left < low && high < right) {
40
+ intervalExists = true;
41
+ return;
42
+ }
43
+ });
44
+ return intervalExists;
45
+ }
46
+ /**
47
+ * Coarse pre-check to filter out invalid json candidates. Short circuits if >1 json candidates exist in slice
48
+ *
49
+ * @param input
50
+ * @param left
51
+ * @param right
52
+ * @returns
53
+ */
54
+ function isBalancedWithOneJson(input, left, right, limit) {
55
+ const terminationThreshold = generateLimit(input, left, limit);
56
+ let braceCount = 0;
57
+ let inString = false;
58
+ let escapeNext = false;
59
+ let firstJsonObj = true;
60
+ for (let i = left; i <= right; i++) {
61
+ if (i >= terminationThreshold) {
62
+ return true;
63
+ }
64
+ const char = input[i];
65
+ if (escapeNext) {
66
+ escapeNext = false;
67
+ continue;
68
+ }
69
+ if (char === "\\") {
70
+ escapeNext = true;
71
+ continue;
72
+ }
73
+ if (char === '"') {
74
+ inString = !inString;
75
+ continue;
76
+ }
77
+ if (inString)
78
+ continue;
79
+ if (char === LBRACE) {
80
+ braceCount++;
81
+ }
82
+ else if (char === RBRACE) {
83
+ braceCount--;
84
+ }
85
+ if (braceCount < 0) {
86
+ return false;
87
+ }
88
+ if (braceCount === 0) {
89
+ if (!firstJsonObj) {
90
+ return false;
91
+ }
92
+ firstJsonObj = !firstJsonObj;
93
+ }
94
+ }
95
+ return braceCount === 0;
96
+ }
97
+ /**
98
+ * Uses a Set to memoize brace locations, a Queue to process candidates top-down and uses an interval tree
99
+ * to detect already validated json ranges. Attempts
100
+ * @param braceLocations
101
+ * @param input
102
+ * @returns
103
+ */
104
+ function findValidJsons({ prefix, suffix }, input, limit) {
105
+ const tree = (0, interval_tree_1d_1.default)();
106
+ const startingPosition = {
107
+ left: 0,
108
+ right: suffix.length - 1,
109
+ };
110
+ const queue = new queue_1.Queue(startingPosition);
111
+ const memo = new Set([convertMemoPositionToKey(startingPosition)]);
112
+ const jsons = [];
113
+ while (queue.length()) {
114
+ const { left: leftIndex, right: rightIndex } = queue.dequeue();
115
+ const leftPosition = prefix[leftIndex];
116
+ const rightPosition = suffix[rightIndex];
117
+ if (rightPosition < leftPosition ||
118
+ queryIntervalSync(tree, leftPosition, rightPosition)) {
119
+ continue;
120
+ }
121
+ try {
122
+ if (isBalancedWithOneJson(input, leftPosition, rightPosition, limit)) {
123
+ jsons.push(JSON.parse(input.slice(leftPosition, rightPosition + 1)));
124
+ tree.insert([leftPosition, rightPosition]);
125
+ continue;
126
+ }
127
+ }
128
+ catch (error) {
129
+ if (!(error instanceof SyntaxError)) {
130
+ throw error;
131
+ }
132
+ }
133
+ const positions = [
134
+ {
135
+ left: leftIndex,
136
+ right: rightIndex - 1 >= 0 ? rightIndex - 1 : rightIndex,
137
+ },
138
+ {
139
+ left: leftIndex + 1 < prefix.length ? leftIndex + 1 : leftIndex,
140
+ right: rightIndex,
141
+ },
142
+ ];
143
+ for (const position of positions) {
144
+ const key = convertMemoPositionToKey(position);
145
+ if (!memo.has(key)) {
146
+ queue.enqueue(position);
147
+ memo.add(key);
148
+ }
149
+ }
150
+ }
151
+ return jsons;
152
+ }
153
+ function generateLimit(input, left, limit) {
154
+ switch (limit) {
155
+ case "log2":
156
+ return left + Math.ceil(Math.log2(input.length));
157
+ case "none":
158
+ return input.length;
159
+ default:
160
+ throw new JsonParseError("unknown limit type provided");
161
+ }
162
+ }
163
+ /**
164
+ * Extracts json objects from a given input string
165
+ * @param input input string
166
+ * @param limit Sets pre-check behavior. If set to 'log2', method will terminate pre-check after reaching log2(n) characters. Useful for malformed data i.e. many {}
167
+ * Else, will do a O(n) scan to coarsely validate brace matches. Useful for many json objects (i.e. early termination)
168
+ * @returns
169
+ */
170
+ function extractJsons(input, limit = "none") {
171
+ if (!input?.length) {
172
+ return [];
173
+ }
174
+ const locations = generateBracesPrefixAndSufix(input);
175
+ if (!locations.prefix.length || !locations.suffix.length) {
176
+ return [];
177
+ }
178
+ return findValidJsons(locations, input, limit);
179
+ }
180
+ //# sourceMappingURL=extractor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"extractor.js","sourceRoot":"","sources":["../src/extractor.ts"],"names":[],"mappings":";;;;;AA4MA,oCAWC;AAjND,mCAAgC;AAChC,wEAA4C;AAE5C,MAAM,MAAM,GAAG,GAAG,CAAC;AACnB,MAAM,MAAM,GAAG,GAAG,CAAC;AAEnB,MAAM,cAAe,SAAQ,KAAK;CAAG;AAErC,SAAS,wBAAwB,CAAC,YAA0B;IAC1D,OAAO,GAAG,YAAY,CAAC,IAAI,IAAI,YAAY,CAAC,KAAK,EAAE,CAAC;AACtD,CAAC;AAED,SAAS,4BAA4B,CAAC,KAAa;IACjD,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,YAAY,GAAwB,IAAI,GAAG,EAAE,CAAC;IAEpD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,IAAI,KAAK,CAAC,CAAC,CAAC,IAAI,MAAM,EAAE,CAAC;YACvB,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACf,YAAY,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC;QACrD,CAAC;aAAM,IAAI,KAAK,CAAC,CAAC,CAAC,IAAI,MAAM,EAAE,CAAC;YAC9B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACf,YAAY,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC;QACrD,CAAC;IACH,CAAC;IAED,OAAO;QACL,MAAM;QACN,MAAM;KACP,CAAC;AACJ,CAAC;AAED,SAAS,iBAAiB,CACxB,IAAsB,EACtB,GAAW,EACX,IAAY;IAEZ,IAAI,cAAc,GAAY,KAAK,CAAC;IACpC,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,QAA0B,EAAE,EAAE;QAC3D,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,GAAG,QAAQ,CAAC;QAC/B,IAAI,IAAI,GAAG,GAAG,IAAI,IAAI,GAAG,KAAK,EAAE,CAAC;YAC/B,cAAc,GAAG,IAAI,CAAC;YACtB,OAAO;QACT,CAAC;IACH,CAAC,CAAC,CAAC;IACH,OAAO,cAAc,CAAC;AACxB,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,qBAAqB,CAC5B,KAAa,EACb,IAAY,EACZ,KAAa,EACb,KAAY;IAEZ,MAAM,oBAAoB,GAAG,aAAa,CAAC,KAAK,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC;IAC/D,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,QAAQ,GAAG,KAAK,CAAC;IACrB,IAAI,UAAU,GAAG,KAAK,CAAC;IACvB,IAAI,YAAY,GAAG,IAAI,CAAC;IAExB,KAAK,IAAI,CAAC,GAAG,IAAI,EAAE,CAAC,IAAI,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;QACnC,IAAI,CAAC,IAAI,oBAAoB,EAAE,CAAC;YAC9B,OAAO,IAAI,CAAC;QACd,CAAC;QACD,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QAEtB,IAAI,UAAU,EAAE,CAAC;YACf,UAAU,GAAG,KAAK,CAAC;YACnB,SAAS;QACX,CAAC;QAED,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;YAClB,UAAU,GAAG,IAAI,CAAC;YAClB,SAAS;QACX,CAAC;QAED,IAAI,IAAI,KAAK,GAAG,EAAE,CAAC;YACjB,QAAQ,GAAG,CAAC,QAAQ,CAAC;YACrB,SAAS;QACX,CAAC;QAED,IAAI,QAAQ;YAAE,SAAS;QAEvB,IAAI,IAAI,KAAK,MAAM,EAAE,CAAC;YACpB,UAAU,EAAE,CAAC;QACf,CAAC;aAAM,IAAI,IAAI,KAAK,MAAM,EAAE,CAAC;YAC3B,UAAU,EAAE,CAAC;QACf,CAAC;QAED,IAAI,UAAU,GAAG,CAAC,EAAE,CAAC;YACnB,OAAO,KAAK,CAAC;QACf,CAAC;QAED,IAAI,UAAU,KAAK,CAAC,EAAE,CAAC;YACrB,IAAI,CAAC,YAAY,EAAE,CAAC;gBAClB,OAAO,KAAK,CAAC;YACf,CAAC;YACD,YAAY,GAAG,CAAC,YAAY,CAAC;QAC/B,CAAC;IACH,CAAC;IAED,OAAO,UAAU,KAAK,CAAC,CAAC;AAC1B,CAAC;AAED;;;;;;GAMG;AACH,SAAS,cAAc,CACrB,EAAE,MAAM,EAAE,MAAM,EAAqB,EACrC,KAAa,EACb,KAAY;IAEZ,MAAM,IAAI,GAAG,IAAA,0BAAY,GAAE,CAAC;IAC5B,MAAM,gBAAgB,GAAiB;QACrC,IAAI,EAAE,CAAC;QACP,KAAK,EAAE,MAAM,CAAC,MAAM,GAAG,CAAC;KACzB,CAAC;IACF,MAAM,KAAK,GAAwB,IAAI,aAAK,CAAC,gBAAgB,CAAC,CAAC;IAC/D,MAAM,IAAI,GAAG,IAAI,GAAG,CAAS,CAAC,wBAAwB,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC;IAC3E,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,OAAO,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC;QACtB,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,UAAU,EAAE,GAAG,KAAK,CAAC,OAAO,EAAG,CAAC;QAChE,MAAM,YAAY,GAAW,MAAM,CAAC,SAAS,CAAE,CAAC;QAChD,MAAM,aAAa,GAAW,MAAM,CAAC,UAAU,CAAE,CAAC;QAElD,IACE,aAAa,GAAG,YAAY;YAC5B,iBAAiB,CAAC,IAAI,EAAE,YAAY,EAAE,aAAa,CAAC,EACpD,CAAC;YACD,SAAS;QACX,CAAC;QAED,IAAI,CAAC;YACH,IAAI,qBAAqB,CAAC,KAAK,EAAE,YAAY,EAAE,aAAa,EAAE,KAAK,CAAC,EAAE,CAAC;gBACrE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,YAAY,EAAE,aAAa,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;gBACrE,IAAI,CAAC,MAAM,CAAC,CAAC,YAAY,EAAE,aAAa,CAAC,CAAC,CAAC;gBAC3C,SAAS;YACX,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,CAAC,CAAC,KAAK,YAAY,WAAW,CAAC,EAAE,CAAC;gBACpC,MAAM,KAAuB,CAAC;YAChC,CAAC;QACH,CAAC;QAED,MAAM,SAAS,GAAmB;YAChC;gBACE,IAAI,EAAE,SAAS;gBACf,KAAK,EAAE,UAAU,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU;aACzD;YACD;gBACE,IAAI,EAAE,SAAS,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS;gBAC/D,KAAK,EAAE,UAAU;aAClB;SACF,CAAC;QAEF,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,MAAM,GAAG,GAAG,wBAAwB,CAAC,QAAQ,CAAC,CAAC;YAC/C,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;gBACnB,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;gBACxB,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAChB,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,IAAY,EAAE,KAAY;IAC9D,QAAQ,KAAK,EAAE,CAAC;QACd,KAAK,MAAM;YACT,OAAO,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;QACnD,KAAK,MAAM;YACT,OAAO,KAAK,CAAC,MAAM,CAAC;QACtB;YACE,MAAM,IAAI,cAAc,CAAC,6BAA6B,CAAC,CAAC;IAC5D,CAAC;AACH,CAAC;AAED;;;;;;GAMG;AACH,SAAgB,YAAY,CAAC,KAAa,EAAE,QAAe,MAAM;IAC/D,IAAI,CAAC,KAAK,EAAE,MAAM,EAAE,CAAC;QACnB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,SAAS,GAAG,4BAA4B,CAAC,KAAK,CAAC,CAAC;IACtD,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,MAAM,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;QACzD,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,OAAO,cAAc,CAAC,SAAS,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;AACjD,CAAC"}
@@ -0,0 +1,2 @@
1
+ export * from "./extractor";
2
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,aAAa,CAAC"}
package/dist/index.js ADDED
@@ -0,0 +1,18 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
+ };
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ __exportStar(require("./extractor"), exports);
18
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,8CAA4B"}
@@ -0,0 +1,34 @@
1
+ export type Interval = [number, number];
2
+ interface IntervalTreeNode {
3
+ mid: number;
4
+ left: IntervalTreeNode | null;
5
+ right: IntervalTreeNode | null;
6
+ leftPoints: Interval[];
7
+ rightPoints: Interval[];
8
+ count: number;
9
+ intervals(result?: Interval[]): Interval[];
10
+ insert(interval: Interval): void;
11
+ remove(interval: Interval): number;
12
+ queryPoint(x: number, cb: (interval: Interval) => any): any;
13
+ queryInterval(lo: number, hi: number, cb: (interval: Interval) => any): any;
14
+ }
15
+ export interface IntervalTreeType {
16
+ root: IntervalTreeNode | null;
17
+ insert(interval: Interval): void;
18
+ remove(interval: Interval): boolean;
19
+ queryPoint(x: number, cb: (interval: Interval) => any): any;
20
+ queryInterval(lo: number, hi: number, cb: (interval: Interval) => any): any;
21
+ readonly count: number;
22
+ readonly intervals: Interval[];
23
+ }
24
+ export interface MemoPosition {
25
+ readonly left: number;
26
+ readonly right: number;
27
+ }
28
+ export interface BraceLocationInfo {
29
+ readonly prefix: number[];
30
+ readonly suffix: number[];
31
+ }
32
+ export type Limit = "log2" | "none";
33
+ export {};
34
+ //# sourceMappingURL=interfaces.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"interfaces.d.ts","sourceRoot":"","sources":["../src/interfaces.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,QAAQ,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;AAExC,UAAU,gBAAgB;IACxB,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,gBAAgB,GAAG,IAAI,CAAC;IAC9B,KAAK,EAAE,gBAAgB,GAAG,IAAI,CAAC;IAC/B,UAAU,EAAE,QAAQ,EAAE,CAAC;IACvB,WAAW,EAAE,QAAQ,EAAE,CAAC;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE,CAAC;IAC3C,MAAM,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAAC;IACjC,MAAM,CAAC,QAAQ,EAAE,QAAQ,GAAG,MAAM,CAAC;IACnC,UAAU,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC,QAAQ,EAAE,QAAQ,KAAK,GAAG,GAAG,GAAG,CAAC;IAC5D,aAAa,CAAC,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC,QAAQ,EAAE,QAAQ,KAAK,GAAG,GAAG,GAAG,CAAC;CAC7E;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,gBAAgB,GAAG,IAAI,CAAC;IAC9B,MAAM,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAAC;IACjC,MAAM,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC;IACpC,UAAU,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC,QAAQ,EAAE,QAAQ,KAAK,GAAG,GAAG,GAAG,CAAC;IAC5D,aAAa,CAAC,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC,QAAQ,EAAE,QAAQ,KAAK,GAAG,GAAG,GAAG,CAAC;IAC5E,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,SAAS,EAAE,QAAQ,EAAE,CAAC;CAChC;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC;IAC1B,QAAQ,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC;CAC3B;AAED,MAAM,MAAM,KAAK,GAAG,MAAM,GAAG,MAAM,CAAC"}
@@ -0,0 +1,3 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ //# sourceMappingURL=interfaces.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"interfaces.js","sourceRoot":"","sources":["../src/interfaces.ts"],"names":[],"mappings":""}
@@ -0,0 +1,10 @@
1
+ export declare class Queue<T> {
2
+ private popQueue;
3
+ private pushQueue;
4
+ constructor(...value: T[]);
5
+ private migratePushToPopQueue;
6
+ enqueue(...value: T[]): void;
7
+ dequeue(): T | null;
8
+ length(): number;
9
+ }
10
+ //# sourceMappingURL=queue.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"queue.d.ts","sourceRoot":"","sources":["../src/queue.ts"],"names":[],"mappings":"AAAA,qBAAa,KAAK,CAAC,CAAC;IAClB,OAAO,CAAC,QAAQ,CAAM;IACtB,OAAO,CAAC,SAAS,CAAM;gBAEX,GAAG,KAAK,EAAE,CAAC,EAAE;IAKzB,OAAO,CAAC,qBAAqB;IAMtB,OAAO,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,GAAG,IAAI;IAI5B,OAAO,IAAI,CAAC,GAAG,IAAI;IAYnB,MAAM,IAAI,MAAM;CAGxB"}
package/dist/queue.js ADDED
@@ -0,0 +1,35 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.Queue = void 0;
4
+ class Queue {
5
+ popQueue;
6
+ pushQueue;
7
+ constructor(...value) {
8
+ this.popQueue = [];
9
+ this.pushQueue = value;
10
+ }
11
+ migratePushToPopQueue() {
12
+ while (this.pushQueue.length) {
13
+ this.popQueue.push(this.pushQueue.pop());
14
+ }
15
+ }
16
+ enqueue(...value) {
17
+ this.pushQueue.push(...value);
18
+ }
19
+ dequeue() {
20
+ if (!this.popQueue.length) {
21
+ this.migratePushToPopQueue();
22
+ }
23
+ if (!this.popQueue.length) {
24
+ return null;
25
+ }
26
+ else {
27
+ return this.popQueue.pop();
28
+ }
29
+ }
30
+ length() {
31
+ return this.popQueue.length + this.pushQueue.length;
32
+ }
33
+ }
34
+ exports.Queue = Queue;
35
+ //# sourceMappingURL=queue.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"queue.js","sourceRoot":"","sources":["../src/queue.ts"],"names":[],"mappings":";;;AAAA,MAAa,KAAK;IACR,QAAQ,CAAM;IACd,SAAS,CAAM;IAEvB,YAAY,GAAG,KAAU;QACvB,IAAI,CAAC,QAAQ,GAAG,EAAE,CAAC;QACnB,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;IACzB,CAAC;IAEO,qBAAqB;QAC3B,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC;YAC7B,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,EAAG,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IAEM,OAAO,CAAC,GAAG,KAAU;QAC1B,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC;IAChC,CAAC;IAEM,OAAO;QACZ,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;YAC1B,IAAI,CAAC,qBAAqB,EAAE,CAAC;QAC/B,CAAC;QAED,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAC;QACd,CAAC;aAAM,CAAC;YACN,OAAO,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAG,CAAC;QAC9B,CAAC;IACH,CAAC;IAEM,MAAM;QACX,OAAO,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC;IACtD,CAAC;CACF;AAlCD,sBAkCC"}
package/jest.config.js ADDED
@@ -0,0 +1,10 @@
1
+ /** @type {import("jest").Config} **/
2
+ export default {
3
+ preset: 'ts-jest',
4
+ testEnvironment: "node",
5
+ transform: {
6
+ "^.+\\.m?ts?$": ["ts-jest", {
7
+ useESM: true
8
+ }]
9
+ }
10
+ };
package/package.json ADDED
@@ -0,0 +1,37 @@
1
+ {
2
+ "name": "full-json-extractor",
3
+ "version": "1.0.0",
4
+ "main": "dist/index.js",
5
+ "scripts": {
6
+ "clean": "rimraf dist build .cache",
7
+ "format": "prettier --check \"src/**/*.{js,ts,tsx,jsx,json,css,md}",
8
+ "format:fix": "prettier --write \"src/**/*.{js,ts,tsx,jsx,json,css,md}",
9
+ "build": "tsc",
10
+ "test": "jest",
11
+ "release": "npm run format && npm run test && npm run build"
12
+ },
13
+ "keywords": [
14
+ "json",
15
+ "json parse",
16
+ "json extract"
17
+ ],
18
+ "author": "Jonzdan",
19
+ "license": "ISC",
20
+ "description": "Brute-forces all possible highest-level json candidates with pruning to keep performance fast with reasonable payload < 1MB",
21
+ "devDependencies": {
22
+ "@types/jest": "^30.0.0",
23
+ "@types/node": "^24.2.1",
24
+ "jest": "^30.0.5",
25
+ "prettier": "^3.6.2",
26
+ "rimraf": "^6.0.1",
27
+ "ts-jest": "^29.4.1",
28
+ "ts-node": "^10.9.2",
29
+ "typescript": "^5.9.2"
30
+ },
31
+ "dependencies": {
32
+ "interval-tree-1d": "^1.0.4"
33
+ },
34
+ "publishConfig": {
35
+ "registry": "https://registry.npmjs.org/"
36
+ }
37
+ }
@@ -0,0 +1,171 @@
1
+ import { extractJsons } from "../extractor";
2
+
3
+ describe("extractJsonIntervals.test", () => {
4
+ it("only json, valid json, 1-depth", () => {
5
+ const testData = `{"sample_id": 1, "data": { "key": "xdsc" }}`;
6
+ const expectedData = [
7
+ {
8
+ sample_id: 1,
9
+ data: {
10
+ key: "xdsc",
11
+ },
12
+ },
13
+ ];
14
+
15
+ expect(extractJsons(testData)[0]).toEqual(expectedData[0]);
16
+ });
17
+
18
+ it("only json, invalid json, 0-depth", () => {
19
+ const testData = "{key: 1}";
20
+ expect(extractJsons(testData)).toEqual([]);
21
+ });
22
+
23
+ it("raw string, invalid json, 2-depth", () => {
24
+ const testData = '[hi] { "outer": { "inner": { key: 1 } } }';
25
+ expect(extractJsons(testData)).toEqual([]);
26
+ });
27
+
28
+ it("raw string, valid json, 1-depth", () => {
29
+ const testData = `[hi] {"sample_id": 1, "data": { "key": "xdsc" }}`;
30
+ const expectedData = [
31
+ {
32
+ sample_id: 1,
33
+ data: {
34
+ key: "xdsc",
35
+ },
36
+ },
37
+ ];
38
+
39
+ expect(extractJsons(testData)[0]).toEqual(expectedData[0]);
40
+ });
41
+
42
+ it("raw string, valid escaped json, 1-depth", () => {
43
+ const testData = `[hi] "{\"sample_id\": 1, \"data\": { \"key\": \"xdsc\" }}"`;
44
+ const expectedData = [
45
+ {
46
+ sample_id: 1,
47
+ data: {
48
+ key: "xdsc",
49
+ },
50
+ },
51
+ ];
52
+
53
+ expect(extractJsons(testData)[0]).toEqual(expectedData[0]);
54
+ });
55
+
56
+ it("raw string, valid json, 1-depth, 3 objects", () => {
57
+ const testData = `[hi] {"sample_id": 1, "data": { "key": "xdsc" }} {"sample_id": 2, "data": { "key": "xdsc" }} {"sample_id": 3, "data": { "key": "xdsc" }}`;
58
+ const expectedData = [
59
+ {
60
+ sample_id: 1,
61
+ data: {
62
+ key: "xdsc",
63
+ },
64
+ },
65
+ {
66
+ sample_id: 2,
67
+ data: {
68
+ key: "xdsc",
69
+ },
70
+ },
71
+ {
72
+ sample_id: 3,
73
+ data: {
74
+ key: "xdsc",
75
+ },
76
+ },
77
+ ];
78
+
79
+ const result = extractJsons(testData);
80
+ for (let i = 0; i < expectedData.length; i++) {
81
+ expect(result).toContainEqual(expectedData[i]);
82
+ }
83
+ });
84
+
85
+ it("raw string, valid json, 1-depth, 3 objects, extra {", () => {
86
+ const testData = `[hi] {{"sample_id": 1, "data": { "key": "xdsc" }} {"sample_id": 2, "data": { "key": "xdsc" }} {"sample_id": 3, "data": { "key": "xdsc" }}`;
87
+ const expectedData = [
88
+ {
89
+ sample_id: 1,
90
+ data: {
91
+ key: "xdsc",
92
+ },
93
+ },
94
+ {
95
+ sample_id: 2,
96
+ data: {
97
+ key: "xdsc",
98
+ },
99
+ },
100
+ {
101
+ sample_id: 3,
102
+ data: {
103
+ key: "xdsc",
104
+ },
105
+ },
106
+ ];
107
+
108
+ const result = extractJsons(testData);
109
+ for (let i = 0; i < expectedData.length; i++) {
110
+ expect(result).toContainEqual(expectedData[i]);
111
+ }
112
+ });
113
+
114
+ it("raw string, valid json, 1-depth, 3 objects, extra {, {} in string", () => {
115
+ const testData = `[hi] {{"sample_id": 1, "data": { "key": "x}dsc" }} {"sample_id": 2, "data": { "key": "xd{sc" }} {"sample_id": 3, "data": { "key": "xdsc" }}`;
116
+ const expectedData = [
117
+ {
118
+ sample_id: 1,
119
+ data: {
120
+ key: "x}dsc",
121
+ },
122
+ },
123
+ {
124
+ sample_id: 2,
125
+ data: {
126
+ key: "xd{sc",
127
+ },
128
+ },
129
+ {
130
+ sample_id: 3,
131
+ data: {
132
+ key: "xdsc",
133
+ },
134
+ },
135
+ ];
136
+
137
+ const result = extractJsons(testData);
138
+ for (let i = 0; i < expectedData.length; i++) {
139
+ expect(result).toContainEqual(expectedData[i]);
140
+ }
141
+ });
142
+
143
+ it("raw string, valid json, 2-depth, 150 objects, extra {, {} in string", () => {
144
+ const testData: string[] = [];
145
+ for (let i = 0; i < 150; i++) {
146
+ testData.push(
147
+ `[hi] {{"sample_id": ${i}, "data": { "key": "x}ds{c", "subdata": { "key": 2 }}}`,
148
+ );
149
+ }
150
+
151
+ const expectedData: object[] = [];
152
+ for (let i = 0; i < 150; i++) {
153
+ expectedData.push({
154
+ sample_id: i,
155
+ data: {
156
+ key: "x}ds{c",
157
+ subdata: {
158
+ key: 2,
159
+ },
160
+ },
161
+ });
162
+ }
163
+
164
+ const testDataString = testData.join("");
165
+ const result = extractJsons(testDataString);
166
+ expect(result.length).toEqual(150);
167
+ for (let i = 0; i < expectedData.length; i++) {
168
+ expect(result).toContainEqual(expectedData[i]);
169
+ }
170
+ });
171
+ });
@@ -0,0 +1,59 @@
1
+ import { performance } from "node:perf_hooks";
2
+ import process from "node:process";
3
+ import { extractJsons } from "./extractor.js";
4
+
5
+ function generateNestedJSON(depth: number): object {
6
+ let obj: any = { value: "test" };
7
+ for (let i = 0; i < depth; i++) {
8
+ obj = { nested: obj };
9
+ }
10
+ return obj;
11
+ }
12
+
13
+ function generateRawString(
14
+ depth: number,
15
+ count: number,
16
+ sizeFactor: number,
17
+ ): string {
18
+ const jsons = [];
19
+ for (let i = 0; i < count; i++) {
20
+ const base = generateNestedJSON(depth);
21
+ (base as any).padding = "x".repeat(sizeFactor);
22
+ jsons.push(JSON.stringify(base));
23
+ }
24
+ return jsons.join(" some text in between ");
25
+ }
26
+
27
+ function getMemoryUsageMB(): number {
28
+ const used = process.memoryUsage().heapUsed / 1024 / 1024;
29
+ return Math.round(used * 100) / 100;
30
+ }
31
+
32
+ async function runBenchmark() {
33
+ const depths = [1, 3, 5, 10, 20];
34
+ const sizes = [0, 1000, 10000, 50_000]; // padding sizes
35
+ const counts = [1, 5, 10, 20]; // number of JSONs per string
36
+
37
+ for (const depth of depths) {
38
+ for (const size of sizes) {
39
+ for (const count of counts) {
40
+ const input = generateRawString(depth, count, size);
41
+
42
+ const memBefore = getMemoryUsageMB();
43
+ const start = performance.now();
44
+
45
+ const result = extractJsons(input);
46
+
47
+ const end = performance.now();
48
+ const memAfter = getMemoryUsageMB();
49
+
50
+ console.log(
51
+ `Depth=${depth}, Size=${size}, Count=${count} | Time=${(end - start).toFixed(3)} ms | ` +
52
+ `MemΔ=${(memAfter - memBefore).toFixed(3)} MB | OutputLen=${JSON.stringify(result).length}`,
53
+ );
54
+ }
55
+ }
56
+ }
57
+ }
58
+
59
+ runBenchmark();
@@ -0,0 +1,208 @@
1
+ import {
2
+ BraceLocationInfo,
3
+ IntervalTreeType,
4
+ Limit,
5
+ MemoPosition,
6
+ } from "./interfaces";
7
+ import { Queue } from "./queue";
8
+ import IntervalTree from "interval-tree-1d";
9
+
10
+ const LBRACE = "{";
11
+ const RBRACE = "}";
12
+
13
+ class JsonExtractError extends Error {}
14
+
15
+ function convertMemoPositionToKey(memoPosition: MemoPosition) {
16
+ return `${memoPosition.left}-${memoPosition.right}`;
17
+ }
18
+
19
+ function generateBracesPrefixAndSufix(input: string): BraceLocationInfo {
20
+ const prefix: number[] = [];
21
+ const suffix: number[] = [];
22
+ const braceCounter: Map<number, number> = new Map();
23
+
24
+ for (let i = 0; i < input.length; i++) {
25
+ if (input[i] == LBRACE) {
26
+ prefix.push(i);
27
+ braceCounter.set(i, prefix.length - suffix.length);
28
+ } else if (input[i] == RBRACE) {
29
+ suffix.push(i);
30
+ braceCounter.set(i, prefix.length - suffix.length);
31
+ }
32
+ }
33
+
34
+ return {
35
+ prefix,
36
+ suffix,
37
+ };
38
+ }
39
+
40
+ function queryIntervalSync(
41
+ tree: IntervalTreeType,
42
+ low: number,
43
+ high: number,
44
+ ): boolean {
45
+ let intervalExists: boolean = false;
46
+ tree.queryInterval(low, high, (interval: [number, number]) => {
47
+ const [left, right] = interval;
48
+ if (left < low && high < right) {
49
+ intervalExists = true;
50
+ return;
51
+ }
52
+ });
53
+ return intervalExists;
54
+ }
55
+
56
+ /**
57
+ * Coarse pre-check to filter out invalid json candidates. Short circuits if >1 json candidates exist in slice
58
+ * @param input
59
+ * @param left
60
+ * @param right
61
+ * @returns
62
+ */
63
+ function isBalancedWithOneJson(
64
+ input: string,
65
+ left: number,
66
+ right: number,
67
+ limit: Limit,
68
+ ): boolean {
69
+ const terminationThreshold = generateLimit(input, left, limit);
70
+ let braceCount = 0;
71
+ let inString = false;
72
+ let escapeNext = false;
73
+ let firstJsonObj = true;
74
+
75
+ for (let i = left; i <= right; i++) {
76
+ if (i >= terminationThreshold) {
77
+ return true;
78
+ }
79
+ const char = input[i];
80
+
81
+ if (escapeNext) {
82
+ escapeNext = false;
83
+ continue;
84
+ }
85
+
86
+ if (char === "\\") {
87
+ escapeNext = true;
88
+ continue;
89
+ }
90
+
91
+ if (char === '"') {
92
+ inString = !inString;
93
+ continue;
94
+ }
95
+
96
+ if (inString) continue;
97
+
98
+ if (char === LBRACE) {
99
+ braceCount++;
100
+ } else if (char === RBRACE) {
101
+ braceCount--;
102
+ }
103
+
104
+ if (braceCount < 0) {
105
+ return false;
106
+ }
107
+
108
+ if (braceCount === 0) {
109
+ if (!firstJsonObj) {
110
+ return false;
111
+ }
112
+ firstJsonObj = !firstJsonObj;
113
+ }
114
+ }
115
+
116
+ return braceCount === 0;
117
+ }
118
+
119
+ function findValidJsons(
120
+ { prefix, suffix }: BraceLocationInfo,
121
+ input: string,
122
+ limit: Limit,
123
+ ): object[] {
124
+ const tree = IntervalTree();
125
+ const startingPosition: MemoPosition = {
126
+ left: 0,
127
+ right: suffix.length - 1,
128
+ };
129
+ const queue: Queue<MemoPosition> = new Queue(startingPosition);
130
+ const memo = new Set<string>([convertMemoPositionToKey(startingPosition)]);
131
+ const jsons: object[] = [];
132
+
133
+ while (queue.length()) {
134
+ const { left: leftIndex, right: rightIndex } = queue.dequeue()!;
135
+ const leftPosition: number = prefix[leftIndex]!;
136
+ const rightPosition: number = suffix[rightIndex]!;
137
+
138
+ if (
139
+ rightPosition < leftPosition ||
140
+ queryIntervalSync(tree, leftPosition, rightPosition)
141
+ ) {
142
+ continue;
143
+ }
144
+
145
+ try {
146
+ if (isBalancedWithOneJson(input, leftPosition, rightPosition, limit)) {
147
+ jsons.push(JSON.parse(input.slice(leftPosition, rightPosition + 1)));
148
+ tree.insert([leftPosition, rightPosition]);
149
+ continue;
150
+ }
151
+ } catch (error) {
152
+ if (!(error instanceof SyntaxError)) {
153
+ throw error as JsonExtractError;
154
+ }
155
+ }
156
+
157
+ const positions: MemoPosition[] = [
158
+ {
159
+ left: leftIndex,
160
+ right: rightIndex - 1 >= 0 ? rightIndex - 1 : rightIndex,
161
+ },
162
+ {
163
+ left: leftIndex + 1 < prefix.length ? leftIndex + 1 : leftIndex,
164
+ right: rightIndex,
165
+ },
166
+ ];
167
+
168
+ for (const position of positions) {
169
+ const key = convertMemoPositionToKey(position);
170
+ if (!memo.has(key)) {
171
+ queue.enqueue(position);
172
+ memo.add(key);
173
+ }
174
+ }
175
+ }
176
+ return jsons;
177
+ }
178
+
179
+ function generateLimit(input: string, left: number, limit: Limit) {
180
+ switch (limit) {
181
+ case "log2":
182
+ return left + Math.ceil(Math.log2(input.length));
183
+ case "none":
184
+ return input.length;
185
+ default:
186
+ throw new JsonExtractError("unknown limit type provided");
187
+ }
188
+ }
189
+
190
+ /**
191
+ * Extracts json objects from a given input string
192
+ * @param input input string
193
+ * @param limit Sets pre-check behavior. If set to 'log2', method will terminate pre-check after reaching log2(n) characters. Useful for large malformed data i.e. many {} + non-json text
194
+ * Else, will do a O(n) scan to coarsely validate brace matches. Useful for many json objects (i.e. early termination)
195
+ * @returns array of JSON objects
196
+ */
197
+ export function extractJsons(input: string, limit: Limit = "none"): object[] {
198
+ if (!input?.length) {
199
+ return [];
200
+ }
201
+
202
+ const locations = generateBracesPrefixAndSufix(input);
203
+ if (!locations.prefix.length || !locations.suffix.length) {
204
+ return [];
205
+ }
206
+
207
+ return findValidJsons(locations, input, limit);
208
+ }
package/src/index.ts ADDED
@@ -0,0 +1 @@
1
+ export * from "./extractor";
@@ -0,0 +1,37 @@
1
+ export type Interval = [number, number];
2
+
3
+ interface IntervalTreeNode {
4
+ mid: number;
5
+ left: IntervalTreeNode | null;
6
+ right: IntervalTreeNode | null;
7
+ leftPoints: Interval[];
8
+ rightPoints: Interval[];
9
+ count: number;
10
+ intervals(result?: Interval[]): Interval[];
11
+ insert(interval: Interval): void;
12
+ remove(interval: Interval): number;
13
+ queryPoint(x: number, cb: (interval: Interval) => any): any;
14
+ queryInterval(lo: number, hi: number, cb: (interval: Interval) => any): any;
15
+ }
16
+
17
+ export interface IntervalTreeType {
18
+ root: IntervalTreeNode | null;
19
+ insert(interval: Interval): void;
20
+ remove(interval: Interval): boolean;
21
+ queryPoint(x: number, cb: (interval: Interval) => any): any;
22
+ queryInterval(lo: number, hi: number, cb: (interval: Interval) => any): any;
23
+ readonly count: number;
24
+ readonly intervals: Interval[];
25
+ }
26
+
27
+ export interface MemoPosition {
28
+ readonly left: number;
29
+ readonly right: number;
30
+ }
31
+
32
+ export interface BraceLocationInfo {
33
+ readonly prefix: number[];
34
+ readonly suffix: number[];
35
+ }
36
+
37
+ export type Limit = "log2" | "none";
package/src/queue.ts ADDED
@@ -0,0 +1,35 @@
1
+ export class Queue<T> {
2
+ private popQueue: T[];
3
+ private pushQueue: T[];
4
+
5
+ constructor(...value: T[]) {
6
+ this.popQueue = [];
7
+ this.pushQueue = value;
8
+ }
9
+
10
+ private migratePushToPopQueue(): void {
11
+ while (this.pushQueue.length) {
12
+ this.popQueue.push(this.pushQueue.pop()!);
13
+ }
14
+ }
15
+
16
+ public enqueue(...value: T[]): void {
17
+ this.pushQueue.push(...value);
18
+ }
19
+
20
+ public dequeue(): T | null {
21
+ if (!this.popQueue.length) {
22
+ this.migratePushToPopQueue();
23
+ }
24
+
25
+ if (!this.popQueue.length) {
26
+ return null;
27
+ } else {
28
+ return this.popQueue.pop()!;
29
+ }
30
+ }
31
+
32
+ public length(): number {
33
+ return this.popQueue.length + this.pushQueue.length;
34
+ }
35
+ }
@@ -0,0 +1,31 @@
1
+ declare module "interval-tree-1d" {
2
+ type Interval = [number, number];
3
+
4
+ interface IntervalTreeNode {
5
+ mid: number;
6
+ left: IntervalTreeNode | null;
7
+ right: IntervalTreeNode | null;
8
+ leftPoints: Interval[];
9
+ rightPoints: Interval[];
10
+ count: number;
11
+ intervals(result?: Interval[]): Interval[];
12
+ insert(interval: Interval): void;
13
+ remove(interval: Interval): number;
14
+ queryPoint(x: number, cb: (interval: Interval) => any): any;
15
+ queryInterval(lo: number, hi: number, cb: (interval: Interval) => any): any;
16
+ }
17
+
18
+ interface IntervalTree {
19
+ root: IntervalTreeNode | null;
20
+ insert(interval: Interval): void;
21
+ remove(interval: Interval): boolean;
22
+ queryPoint(x: number, cb: (interval: Interval) => any): any;
23
+ queryInterval(lo: number, hi: number, cb: (interval: Interval) => any): any;
24
+ readonly count: number;
25
+ readonly intervals: Interval[];
26
+ }
27
+
28
+ function createWrapper(intervals?: Interval[]): IntervalTree;
29
+
30
+ export = createWrapper;
31
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,54 @@
1
+ {
2
+ // Visit https://aka.ms/tsconfig to read more about this file
3
+ "compilerOptions": {
4
+ // File Layout
5
+ "rootDir": "src",
6
+ "outDir": "./dist",
7
+ // Environment Settings
8
+ // See also https://aka.ms/tsconfig/module
9
+ "module": "commonjs",
10
+ "target": "esnext",
11
+ "types": ["@types/jest", "@types/node"],
12
+ "moduleResolution": "node",
13
+ // For nodejs:
14
+ // "lib": ["esnext"],
15
+ // "types": ["node"],
16
+ // and npm install -D @types/node
17
+
18
+ // Other Outputs
19
+ "sourceMap": true,
20
+ "declaration": true,
21
+ "declarationMap": true,
22
+
23
+ // Stricter Typechecking Options
24
+ "noUncheckedIndexedAccess": true,
25
+ "exactOptionalPropertyTypes": true,
26
+
27
+ // Style Options
28
+ "noImplicitReturns": true,
29
+ "noImplicitOverride": true,
30
+ "noUnusedLocals": true,
31
+ "noUnusedParameters": true,
32
+ "noFallthroughCasesInSwitch": true,
33
+ "noPropertyAccessFromIndexSignature": true,
34
+
35
+ // Recommended Options
36
+ "esModuleInterop": true,
37
+ "strict": true,
38
+ "isolatedModules": true,
39
+ "forceConsistentCasingInFileNames": true,
40
+ "noUncheckedSideEffectImports": true,
41
+ "moduleDetection": "force",
42
+ "skipLibCheck": true,
43
+ "paths": {
44
+ "src/*": ["./src/*"]
45
+ }
46
+ },
47
+ "include": [
48
+ "src/**/*"
49
+ ],
50
+ "exclude": [
51
+ "node_modules",
52
+ "src/**/__tests__"
53
+ ]
54
+ }