full-json-extractor 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/extractor.ts CHANGED
@@ -1,208 +1,228 @@
1
+ import IntervalTree from "interval-tree-1d";
2
+
1
3
  import {
2
- BraceLocationInfo,
3
- IntervalTreeType,
4
- Limit,
5
- MemoPosition,
4
+ BraceLocationInfo,
5
+ IntervalTreeType,
6
+ Limit,
7
+ MemoPosition,
6
8
  } from "./interfaces";
7
- import { Queue } from "./queue";
8
- import IntervalTree from "interval-tree-1d";
9
+ import Denque from "denque";
9
10
 
10
11
  const LBRACE = "{";
11
12
  const RBRACE = "}";
12
13
 
13
14
  class JsonExtractError extends Error {}
14
15
 
15
- function convertMemoPositionToKey(memoPosition: MemoPosition) {
16
- return `${memoPosition.left}-${memoPosition.right}`;
17
- }
18
-
19
- function generateBracesPrefixAndSufix(input: string): BraceLocationInfo {
20
- const prefix: number[] = [];
21
- const suffix: number[] = [];
22
- const braceCounter: Map<number, number> = new Map();
23
-
24
- for (let i = 0; i < input.length; i++) {
25
- if (input[i] == LBRACE) {
26
- prefix.push(i);
27
- braceCounter.set(i, prefix.length - suffix.length);
28
- } else if (input[i] == RBRACE) {
29
- suffix.push(i);
30
- braceCounter.set(i, prefix.length - suffix.length);
31
- }
32
- }
33
-
34
- return {
35
- prefix,
36
- suffix,
37
- };
38
- }
39
-
40
- function queryIntervalSync(
41
- tree: IntervalTreeType,
42
- low: number,
43
- high: number,
44
- ): boolean {
45
- let intervalExists: boolean = false;
46
- tree.queryInterval(low, high, (interval: [number, number]) => {
47
- const [left, right] = interval;
48
- if (left < low && high < right) {
49
- intervalExists = true;
50
- return;
51
- }
52
- });
53
- return intervalExists;
54
- }
55
-
56
16
  /**
57
- * Coarse pre-check to filter out invalid json candidates. Short circuits if >1 json candidates exist in slice
58
- * @param input
59
- * @param left
60
- * @param right
61
- * @returns
17
+ * Extracts json objects from a given input string
18
+ * @param input input string
19
+ * @param limit Sets pre-check behavior. If set to 'log2', method will terminate pre-check after reaching log2(n) characters. Useful for extremely large malformed data i.e. many {} + non-json text
20
+ * Else, will do a O(n) pre-check scan to coarsely validate brace matches. Useful for many json objects (i.e. early termination)
21
+ * @returns array of JSON objects
62
22
  */
63
- function isBalancedWithOneJson(
64
- input: string,
65
- left: number,
66
- right: number,
67
- limit: Limit,
68
- ): boolean {
69
- const terminationThreshold = generateLimit(input, left, limit);
70
- let braceCount = 0;
71
- let inString = false;
72
- let escapeNext = false;
73
- let firstJsonObj = true;
74
-
75
- for (let i = left; i <= right; i++) {
76
- if (i >= terminationThreshold) {
77
- return true;
78
- }
79
- const char = input[i];
80
-
81
- if (escapeNext) {
82
- escapeNext = false;
83
- continue;
23
+ export function extractJsons<T = unknown>(
24
+ input: string,
25
+ limit: Limit = "none",
26
+ ): T[] {
27
+ if (!input.length) {
28
+ return [];
84
29
  }
85
30
 
86
- if (char === "\\") {
87
- escapeNext = true;
88
- continue;
31
+ const locations = generateBracesPrefixAndSufix(input);
32
+ if (!locations.prefix.length || !locations.suffix.length) {
33
+ return [];
89
34
  }
90
35
 
91
- if (char === '"') {
92
- inString = !inString;
93
- continue;
94
- }
95
-
96
- if (inString) continue;
97
-
98
- if (char === LBRACE) {
99
- braceCount++;
100
- } else if (char === RBRACE) {
101
- braceCount--;
102
- }
36
+ return findValidJsons<T>(locations, input, limit);
37
+ }
103
38
 
104
- if (braceCount < 0) {
105
- return false;
39
+ function setMapValue(
40
+ memo: Map<number, Set<number>>,
41
+ leftIndex: number,
42
+ rightIndex: number,
43
+ ): boolean {
44
+ let set = memo.get(leftIndex);
45
+ if (!set) {
46
+ set = new Set<number>();
47
+ memo.set(leftIndex, set);
106
48
  }
107
-
108
- if (braceCount === 0) {
109
- if (!firstJsonObj) {
49
+ if (set.has(rightIndex)) {
110
50
  return false;
111
- }
112
- firstJsonObj = !firstJsonObj;
113
51
  }
114
- }
115
-
116
- return braceCount === 0;
52
+ set.add(rightIndex);
53
+ return true;
117
54
  }
118
55
 
119
- function findValidJsons(
120
- { prefix, suffix }: BraceLocationInfo,
121
- input: string,
122
- limit: Limit,
123
- ): object[] {
124
- const tree = IntervalTree();
125
- const startingPosition: MemoPosition = {
126
- left: 0,
127
- right: suffix.length - 1,
128
- };
129
- const queue: Queue<MemoPosition> = new Queue(startingPosition);
130
- const memo = new Set<string>([convertMemoPositionToKey(startingPosition)]);
131
- const jsons: object[] = [];
132
-
133
- while (queue.length()) {
134
- const { left: leftIndex, right: rightIndex } = queue.dequeue()!;
135
- const leftPosition: number = prefix[leftIndex]!;
136
- const rightPosition: number = suffix[rightIndex]!;
137
-
138
- if (
139
- rightPosition < leftPosition ||
140
- queryIntervalSync(tree, leftPosition, rightPosition)
141
- ) {
142
- continue;
56
+ function findValidJsons<T = unknown>(
57
+ { prefix, suffix }: BraceLocationInfo,
58
+ input: string,
59
+ limit: Limit,
60
+ ): T[] {
61
+ const tree = IntervalTree();
62
+ const queue = new Denque<MemoPosition>([[0, suffix.length - 1]]);
63
+ const memo = new Map<number, Set<number>>([
64
+ [0, new Set([suffix.length - 1])],
65
+ ]);
66
+ const jsons: T[] = [];
67
+
68
+ while (!queue.isEmpty()) {
69
+ const [leftIndex, rightIndex] = queue.shift()!;
70
+ const leftPosition: number = prefix[leftIndex]!;
71
+ const rightPosition: number = suffix[rightIndex]!;
72
+
73
+ if (
74
+ rightPosition < leftPosition ||
75
+ queryIntervalSync(tree, leftPosition, rightPosition)
76
+ ) {
77
+ continue;
78
+ }
79
+
80
+ const set = memo.get(leftIndex)!;
81
+ set.delete(rightIndex);
82
+
83
+ try {
84
+ if (
85
+ isBalancedWithOneJson(input, leftPosition, rightPosition, limit)
86
+ ) {
87
+ jsons.push(
88
+ JSON.parse(input.slice(leftPosition, rightPosition + 1)),
89
+ );
90
+ tree.insert([leftPosition, rightPosition]);
91
+ continue;
92
+ }
93
+ } catch (error) {
94
+ if (!(error instanceof SyntaxError)) {
95
+ throw error as JsonExtractError;
96
+ }
97
+ }
98
+ if (
99
+ rightIndex - 1 >= 0 &&
100
+ setMapValue(memo, leftIndex, rightIndex - 1)
101
+ ) {
102
+ queue.push([leftIndex, rightIndex - 1]);
103
+ }
104
+
105
+ if (
106
+ leftIndex + 1 < prefix.length &&
107
+ setMapValue(memo, leftIndex + 1, rightIndex)
108
+ ) {
109
+ queue.push([leftIndex + 1, rightIndex]);
110
+ }
111
+
112
+ if (!set.size) {
113
+ memo.delete(leftIndex);
114
+ }
143
115
  }
116
+ return jsons;
117
+ }
144
118
 
145
- try {
146
- if (isBalancedWithOneJson(input, leftPosition, rightPosition, limit)) {
147
- jsons.push(JSON.parse(input.slice(leftPosition, rightPosition + 1)));
148
- tree.insert([leftPosition, rightPosition]);
149
- continue;
150
- }
151
- } catch (error) {
152
- if (!(error instanceof SyntaxError)) {
153
- throw error as JsonExtractError;
154
- }
119
+ function generateBracesPrefixAndSufix(input: string): BraceLocationInfo {
120
+ const prefix: number[] = [];
121
+ const suffix: number[] = [];
122
+
123
+ for (let i = 0; i < input.length; i++) {
124
+ if (input[i] == LBRACE) {
125
+ prefix.push(i);
126
+ } else if (input[i] == RBRACE) {
127
+ suffix.push(i);
128
+ }
155
129
  }
156
130
 
157
- const positions: MemoPosition[] = [
158
- {
159
- left: leftIndex,
160
- right: rightIndex - 1 >= 0 ? rightIndex - 1 : rightIndex,
161
- },
162
- {
163
- left: leftIndex + 1 < prefix.length ? leftIndex + 1 : leftIndex,
164
- right: rightIndex,
165
- },
166
- ];
167
-
168
- for (const position of positions) {
169
- const key = convertMemoPositionToKey(position);
170
- if (!memo.has(key)) {
171
- queue.enqueue(position);
172
- memo.add(key);
173
- }
174
- }
175
- }
176
- return jsons;
131
+ return {
132
+ prefix,
133
+ suffix,
134
+ };
177
135
  }
178
136
 
179
137
  function generateLimit(input: string, left: number, limit: Limit) {
180
- switch (limit) {
181
- case "log2":
182
- return left + Math.ceil(Math.log2(input.length));
183
- case "none":
184
- return input.length;
185
- default:
186
- throw new JsonExtractError("unknown limit type provided");
187
- }
138
+ switch (limit) {
139
+ case "log2":
140
+ return (
141
+ left +
142
+ Math.min(input.length, Math.ceil(Math.log2(input.length)))
143
+ );
144
+ case "none":
145
+ return input.length;
146
+ default:
147
+ throw new JsonExtractError("unknown limit type provided");
148
+ }
188
149
  }
189
150
 
190
151
  /**
191
- * Extracts json objects from a given input string
192
- * @param input input string
193
- * @param limit Sets pre-check behavior. If set to 'log2', method will terminate pre-check after reaching log2(n) characters. Useful for large malformed data i.e. many {} + non-json text
194
- * Else, will do a O(n) scan to coarsely validate brace matches. Useful for many json objects (i.e. early termination)
195
- * @returns array of JSON objects
152
+ * Coarse pre-check to filter out invalid json candidates. Short circuits if >1 json candidates exist in slice
153
+ * @param input
154
+ * @param left
155
+ * @param right
156
+ * @returns
196
157
  */
197
- export function extractJsons(input: string, limit: Limit = "none"): object[] {
198
- if (!input?.length) {
199
- return [];
200
- }
158
+ function isBalancedWithOneJson(
159
+ input: string,
160
+ left: number,
161
+ right: number,
162
+ limit: Limit,
163
+ ): boolean {
164
+ const terminationThreshold = generateLimit(input, left, limit);
165
+ let braceCount = 0;
166
+ let inString = false;
167
+ let escapeNext = false;
168
+ let firstJsonObj = true;
169
+
170
+ for (let i = left; i <= right; i++) {
171
+ if (i >= terminationThreshold) {
172
+ return true;
173
+ }
174
+
175
+ const char = input[i];
176
+
177
+ if (escapeNext) {
178
+ escapeNext = false;
179
+ continue;
180
+ }
181
+
182
+ if (char === "\\") {
183
+ escapeNext = true;
184
+ continue;
185
+ }
186
+
187
+ if (char === '"') {
188
+ inString = !inString;
189
+ continue;
190
+ }
191
+
192
+ if (inString) continue;
193
+
194
+ if (char === LBRACE) {
195
+ braceCount++;
196
+ } else if (char === RBRACE) {
197
+ braceCount--;
198
+ }
199
+
200
+ if (braceCount < 0) {
201
+ return false;
202
+ }
203
+
204
+ if (braceCount === 0) {
205
+ if (!firstJsonObj) {
206
+ return false;
207
+ }
208
+ firstJsonObj = !firstJsonObj;
209
+ }
210
+ }
201
211
 
202
- const locations = generateBracesPrefixAndSufix(input);
203
- if (!locations.prefix.length || !locations.suffix.length) {
204
- return [];
205
- }
212
+ return braceCount === 0;
213
+ }
206
214
 
207
- return findValidJsons(locations, input, limit);
215
+ function queryIntervalSync(
216
+ tree: IntervalTreeType,
217
+ low: number,
218
+ high: number,
219
+ ): boolean {
220
+ let intervalExists = false;
221
+ tree.queryInterval(low, high, ([left, right]: [number, number]) => {
222
+ if (left < low && high < right) {
223
+ intervalExists = true;
224
+ return;
225
+ }
226
+ });
227
+ return intervalExists;
208
228
  }
package/src/interfaces.ts CHANGED
@@ -1,37 +1,37 @@
1
- export type Interval = [number, number];
2
-
3
- interface IntervalTreeNode {
4
- mid: number;
5
- left: IntervalTreeNode | null;
6
- right: IntervalTreeNode | null;
7
- leftPoints: Interval[];
8
- rightPoints: Interval[];
9
- count: number;
10
- intervals(result?: Interval[]): Interval[];
11
- insert(interval: Interval): void;
12
- remove(interval: Interval): number;
13
- queryPoint(x: number, cb: (interval: Interval) => any): any;
14
- queryInterval(lo: number, hi: number, cb: (interval: Interval) => any): any;
1
+ export interface BraceLocationInfo {
2
+ readonly prefix: number[];
3
+ readonly suffix: number[];
15
4
  }
16
5
 
6
+ export type Interval = [number, number];
7
+
17
8
  export interface IntervalTreeType {
18
- root: IntervalTreeNode | null;
19
- insert(interval: Interval): void;
20
- remove(interval: Interval): boolean;
21
- queryPoint(x: number, cb: (interval: Interval) => any): any;
22
- queryInterval(lo: number, hi: number, cb: (interval: Interval) => any): any;
23
- readonly count: number;
24
- readonly intervals: Interval[];
9
+ readonly count: number;
10
+ insert(interval: Interval): void;
11
+ readonly intervals: Interval[];
12
+ queryInterval(lo: number, hi: number, cb: (interval: Interval) => any): any;
13
+ queryPoint(x: number, cb: (interval: Interval) => any): any;
14
+ remove(interval: Interval): boolean;
15
+ root: IntervalTreeNode | null;
25
16
  }
26
17
 
27
- export interface MemoPosition {
28
- readonly left: number;
29
- readonly right: number;
30
- }
18
+ export type Limit = "log2" | "none";
31
19
 
32
- export interface BraceLocationInfo {
33
- readonly prefix: number[];
34
- readonly suffix: number[];
35
- }
20
+ /**
21
+ * [left, right]: [number, number]
22
+ */
23
+ export type MemoPosition = [number, number];
36
24
 
37
- export type Limit = "log2" | "none";
25
+ interface IntervalTreeNode {
26
+ count: number;
27
+ insert(interval: Interval): void;
28
+ intervals(result?: Interval[]): Interval[];
29
+ left: IntervalTreeNode | null;
30
+ leftPoints: Interval[];
31
+ mid: number;
32
+ queryInterval(lo: number, hi: number, cb: (interval: Interval) => any): any;
33
+ queryPoint(x: number, cb: (interval: Interval) => any): any;
34
+ remove(interval: Interval): number;
35
+ right: IntervalTreeNode | null;
36
+ rightPoints: Interval[];
37
+ }
@@ -1,31 +1,39 @@
1
1
  declare module "interval-tree-1d" {
2
- type Interval = [number, number];
2
+ type Interval = [number, number];
3
3
 
4
- interface IntervalTreeNode {
5
- mid: number;
6
- left: IntervalTreeNode | null;
7
- right: IntervalTreeNode | null;
8
- leftPoints: Interval[];
9
- rightPoints: Interval[];
10
- count: number;
11
- intervals(result?: Interval[]): Interval[];
12
- insert(interval: Interval): void;
13
- remove(interval: Interval): number;
14
- queryPoint(x: number, cb: (interval: Interval) => any): any;
15
- queryInterval(lo: number, hi: number, cb: (interval: Interval) => any): any;
16
- }
4
+ interface IntervalTree {
5
+ readonly count: number;
6
+ insert(interval: Interval): void;
7
+ readonly intervals: Interval[];
8
+ queryInterval(
9
+ lo: number,
10
+ hi: number,
11
+ cb: (interval: Interval) => any,
12
+ ): any;
13
+ queryPoint(x: number, cb: (interval: Interval) => any): any;
14
+ remove(interval: Interval): boolean;
15
+ root: IntervalTreeNode | null;
16
+ }
17
17
 
18
- interface IntervalTree {
19
- root: IntervalTreeNode | null;
20
- insert(interval: Interval): void;
21
- remove(interval: Interval): boolean;
22
- queryPoint(x: number, cb: (interval: Interval) => any): any;
23
- queryInterval(lo: number, hi: number, cb: (interval: Interval) => any): any;
24
- readonly count: number;
25
- readonly intervals: Interval[];
26
- }
18
+ interface IntervalTreeNode {
19
+ count: number;
20
+ insert(interval: Interval): void;
21
+ intervals(result?: Interval[]): Interval[];
22
+ left: IntervalTreeNode | null;
23
+ leftPoints: Interval[];
24
+ mid: number;
25
+ queryInterval(
26
+ lo: number,
27
+ hi: number,
28
+ cb: (interval: Interval) => any,
29
+ ): any;
30
+ queryPoint(x: number, cb: (interval: Interval) => any): any;
31
+ remove(interval: Interval): number;
32
+ right: IntervalTreeNode | null;
33
+ rightPoints: Interval[];
34
+ }
27
35
 
28
- function createWrapper(intervals?: Interval[]): IntervalTree;
36
+ function createWrapper(intervals?: Interval[]): IntervalTree;
29
37
 
30
- export = createWrapper;
38
+ export = createWrapper;
31
39
  }
package/tsconfig.json CHANGED
@@ -49,6 +49,6 @@
49
49
  ],
50
50
  "exclude": [
51
51
  "node_modules",
52
- "src/**/__tests__"
52
+ "src/**/__tests__",
53
53
  ]
54
54
  }
package/dist/queue.d.ts DELETED
@@ -1,10 +0,0 @@
1
- export declare class Queue<T> {
2
- private popQueue;
3
- private pushQueue;
4
- constructor(...value: T[]);
5
- private migratePushToPopQueue;
6
- enqueue(...value: T[]): void;
7
- dequeue(): T | null;
8
- length(): number;
9
- }
10
- //# sourceMappingURL=queue.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"queue.d.ts","sourceRoot":"","sources":["../src/queue.ts"],"names":[],"mappings":"AAAA,qBAAa,KAAK,CAAC,CAAC;IAClB,OAAO,CAAC,QAAQ,CAAM;IACtB,OAAO,CAAC,SAAS,CAAM;gBAEX,GAAG,KAAK,EAAE,CAAC,EAAE;IAKzB,OAAO,CAAC,qBAAqB;IAMtB,OAAO,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,GAAG,IAAI;IAI5B,OAAO,IAAI,CAAC,GAAG,IAAI;IAYnB,MAAM,IAAI,MAAM;CAGxB"}
package/dist/queue.js DELETED
@@ -1,35 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.Queue = void 0;
4
- class Queue {
5
- popQueue;
6
- pushQueue;
7
- constructor(...value) {
8
- this.popQueue = [];
9
- this.pushQueue = value;
10
- }
11
- migratePushToPopQueue() {
12
- while (this.pushQueue.length) {
13
- this.popQueue.push(this.pushQueue.pop());
14
- }
15
- }
16
- enqueue(...value) {
17
- this.pushQueue.push(...value);
18
- }
19
- dequeue() {
20
- if (!this.popQueue.length) {
21
- this.migratePushToPopQueue();
22
- }
23
- if (!this.popQueue.length) {
24
- return null;
25
- }
26
- else {
27
- return this.popQueue.pop();
28
- }
29
- }
30
- length() {
31
- return this.popQueue.length + this.pushQueue.length;
32
- }
33
- }
34
- exports.Queue = Queue;
35
- //# sourceMappingURL=queue.js.map
package/dist/queue.js.map DELETED
@@ -1 +0,0 @@
1
- {"version":3,"file":"queue.js","sourceRoot":"","sources":["../src/queue.ts"],"names":[],"mappings":";;;AAAA,MAAa,KAAK;IACR,QAAQ,CAAM;IACd,SAAS,CAAM;IAEvB,YAAY,GAAG,KAAU;QACvB,IAAI,CAAC,QAAQ,GAAG,EAAE,CAAC;QACnB,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;IACzB,CAAC;IAEO,qBAAqB;QAC3B,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC;YAC7B,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,EAAG,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IAEM,OAAO,CAAC,GAAG,KAAU;QAC1B,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC;IAChC,CAAC;IAEM,OAAO;QACZ,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;YAC1B,IAAI,CAAC,qBAAqB,EAAE,CAAC;QAC/B,CAAC;QAED,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAC;QACd,CAAC;aAAM,CAAC;YACN,OAAO,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAG,CAAC;QAC9B,CAAC;IACH,CAAC;IAEM,MAAM;QACX,OAAO,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC;IACtD,CAAC;CACF;AAlCD,sBAkCC"}
package/src/queue.ts DELETED
@@ -1,35 +0,0 @@
1
- export class Queue<T> {
2
- private popQueue: T[];
3
- private pushQueue: T[];
4
-
5
- constructor(...value: T[]) {
6
- this.popQueue = [];
7
- this.pushQueue = value;
8
- }
9
-
10
- private migratePushToPopQueue(): void {
11
- while (this.pushQueue.length) {
12
- this.popQueue.push(this.pushQueue.pop()!);
13
- }
14
- }
15
-
16
- public enqueue(...value: T[]): void {
17
- this.pushQueue.push(...value);
18
- }
19
-
20
- public dequeue(): T | null {
21
- if (!this.popQueue.length) {
22
- this.migratePushToPopQueue();
23
- }
24
-
25
- if (!this.popQueue.length) {
26
- return null;
27
- } else {
28
- return this.popQueue.pop()!;
29
- }
30
- }
31
-
32
- public length(): number {
33
- return this.popQueue.length + this.pushQueue.length;
34
- }
35
- }