full-json-extractor 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.vscode/launch.json +21 -0
- package/README.md +21 -2
- package/dist/benchmark.js.map +1 -1
- package/dist/extractor.d.ts +4 -4
- package/dist/extractor.d.ts.map +1 -1
- package/dist/extractor.js +91 -99
- package/dist/extractor.js.map +1 -1
- package/dist/interfaces.d.ts +25 -25
- package/dist/interfaces.d.ts.map +1 -1
- package/eslint.config.js +39 -0
- package/package.json +49 -37
- package/src/__tests__/json-parser.test.ts +167 -167
- package/src/benchmark.ts +36 -35
- package/src/extractor.ts +194 -174
- package/src/interfaces.ts +30 -30
- package/src/types/interval-tree-1d.d.ts +33 -25
- package/tsconfig.json +1 -1
- package/dist/queue.d.ts +0 -10
- package/dist/queue.d.ts.map +0 -1
- package/dist/queue.js +0 -35
- package/dist/queue.js.map +0 -1
- package/src/queue.ts +0 -35
|
@@ -1,171 +1,171 @@
|
|
|
1
1
|
import { extractJsons } from "../extractor";
|
|
2
2
|
|
|
3
3
|
describe("extractJsonIntervals.test", () => {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
4
|
+
it("only json, valid json, 1-depth", () => {
|
|
5
|
+
const testData = `{"sample_id": 1, "data": { "key": "xdsc" }}`;
|
|
6
|
+
const expectedData = [
|
|
7
|
+
{
|
|
8
|
+
sample_id: 1,
|
|
9
|
+
data: {
|
|
10
|
+
key: "xdsc",
|
|
11
|
+
},
|
|
12
|
+
},
|
|
13
|
+
];
|
|
14
|
+
|
|
15
|
+
expect(extractJsons(testData)[0]).toEqual(expectedData[0]);
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
it("only json, invalid json, 0-depth", () => {
|
|
19
|
+
const testData = "{key: 1}";
|
|
20
|
+
expect(extractJsons(testData)).toEqual([]);
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
it("raw string, invalid json, 2-depth", () => {
|
|
24
|
+
const testData = '[hi] { "outer": { "inner": { key: 1 } } }';
|
|
25
|
+
expect(extractJsons(testData)).toEqual([]);
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
it("raw string, valid json, 1-depth", () => {
|
|
29
|
+
const testData = `[hi] {"sample_id": 1, "data": { "key": "xdsc" }}`;
|
|
30
|
+
const expectedData = [
|
|
31
|
+
{
|
|
32
|
+
sample_id: 1,
|
|
33
|
+
data: {
|
|
34
|
+
key: "xdsc",
|
|
35
|
+
},
|
|
36
|
+
},
|
|
37
|
+
];
|
|
38
|
+
|
|
39
|
+
expect(extractJsons(testData)[0]).toEqual(expectedData[0]);
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it("raw string, valid escaped json, 1-depth", () => {
|
|
43
|
+
const testData = `[hi] "{\"sample_id\": 1, \"data\": { \"key\": \"xdsc\" }}"`;
|
|
44
|
+
const expectedData = [
|
|
45
|
+
{
|
|
46
|
+
sample_id: 1,
|
|
47
|
+
data: {
|
|
48
|
+
key: "xdsc",
|
|
49
|
+
},
|
|
50
|
+
},
|
|
51
|
+
];
|
|
52
|
+
|
|
53
|
+
expect(extractJsons(testData)[0]).toEqual(expectedData[0]);
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
it("raw string, valid json, 1-depth, 3 objects", () => {
|
|
57
|
+
const testData = `[hi] {"sample_id": 1, "data": { "key": "xdsc" }} {"sample_id": 2, "data": { "key": "xdsc" }} {"sample_id": 3, "data": { "key": "xdsc" }}`;
|
|
58
|
+
const expectedData = [
|
|
59
|
+
{
|
|
60
|
+
sample_id: 1,
|
|
61
|
+
data: {
|
|
62
|
+
key: "xdsc",
|
|
63
|
+
},
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
sample_id: 2,
|
|
67
|
+
data: {
|
|
68
|
+
key: "xdsc",
|
|
69
|
+
},
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
sample_id: 3,
|
|
73
|
+
data: {
|
|
74
|
+
key: "xdsc",
|
|
75
|
+
},
|
|
76
|
+
},
|
|
77
|
+
];
|
|
78
|
+
|
|
79
|
+
const result = extractJsons(testData);
|
|
80
|
+
for (const json of expectedData) {
|
|
81
|
+
expect(result).toContainEqual(json);
|
|
82
|
+
}
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
it("raw string, valid json, 1-depth, 3 objects, extra {", () => {
|
|
86
|
+
const testData = `[hi] {{"sample_id": 1, "data": { "key": "xdsc" }} {"sample_id": 2, "data": { "key": "xdsc" }} {"sample_id": 3, "data": { "key": "xdsc" }}`;
|
|
87
|
+
const expectedData = [
|
|
88
|
+
{
|
|
89
|
+
sample_id: 1,
|
|
90
|
+
data: {
|
|
91
|
+
key: "xdsc",
|
|
92
|
+
},
|
|
93
|
+
},
|
|
94
|
+
{
|
|
95
|
+
sample_id: 2,
|
|
96
|
+
data: {
|
|
97
|
+
key: "xdsc",
|
|
98
|
+
},
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
sample_id: 3,
|
|
102
|
+
data: {
|
|
103
|
+
key: "xdsc",
|
|
104
|
+
},
|
|
105
|
+
},
|
|
106
|
+
];
|
|
107
|
+
|
|
108
|
+
const result = extractJsons(testData);
|
|
109
|
+
for (const json of expectedData) {
|
|
110
|
+
expect(result).toContainEqual(json);
|
|
111
|
+
}
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
it("raw string, valid json, 1-depth, 3 objects, extra {, {} in string", () => {
|
|
115
|
+
const testData = `[hi] {{"sample_id": 1, "data": { "key": "x}dsc" }} {"sample_id": 2, "data": { "key": "xd{sc" }} {"sample_id": 3, "data": { "key": "xdsc" }}`;
|
|
116
|
+
const expectedData = [
|
|
117
|
+
{
|
|
118
|
+
sample_id: 1,
|
|
119
|
+
data: {
|
|
120
|
+
key: "x}dsc",
|
|
121
|
+
},
|
|
122
|
+
},
|
|
123
|
+
{
|
|
124
|
+
sample_id: 2,
|
|
125
|
+
data: {
|
|
126
|
+
key: "xd{sc",
|
|
127
|
+
},
|
|
128
|
+
},
|
|
129
|
+
{
|
|
130
|
+
sample_id: 3,
|
|
131
|
+
data: {
|
|
132
|
+
key: "xdsc",
|
|
133
|
+
},
|
|
134
|
+
},
|
|
135
|
+
];
|
|
136
|
+
|
|
137
|
+
const result = extractJsons(testData);
|
|
138
|
+
for (const json of expectedData) {
|
|
139
|
+
expect(result).toContainEqual(json);
|
|
140
|
+
}
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
it("raw string, valid json, 2-depth, 150 objects, extra {, {} in string", () => {
|
|
144
|
+
const testData: string[] = [];
|
|
145
|
+
for (let i = 0; i < 150; i++) {
|
|
146
|
+
testData.push(
|
|
147
|
+
`[hi] {{"sample_id": ${i}, "data": { "key": "x}ds{c", "subdata": { "key": 2 }}}`,
|
|
148
|
+
);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const expectedData: object[] = [];
|
|
152
|
+
for (let i = 0; i < 150; i++) {
|
|
153
|
+
expectedData.push({
|
|
154
|
+
sample_id: i,
|
|
155
|
+
data: {
|
|
156
|
+
key: "x}ds{c",
|
|
157
|
+
subdata: {
|
|
158
|
+
key: 2,
|
|
159
|
+
},
|
|
160
|
+
},
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
const testDataString = testData.join("");
|
|
165
|
+
const result = extractJsons(testDataString);
|
|
166
|
+
expect(result.length).toEqual(150);
|
|
167
|
+
for (const json of expectedData) {
|
|
168
|
+
expect(result).toContainEqual(json);
|
|
169
|
+
}
|
|
170
|
+
});
|
|
171
171
|
});
|
package/src/benchmark.ts
CHANGED
|
@@ -1,59 +1,60 @@
|
|
|
1
1
|
import { performance } from "node:perf_hooks";
|
|
2
2
|
import process from "node:process";
|
|
3
|
+
|
|
3
4
|
import { extractJsons } from "./extractor.js";
|
|
4
5
|
|
|
5
6
|
function generateNestedJSON(depth: number): object {
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
7
|
+
let obj: any = { value: "test" };
|
|
8
|
+
for (let i = 0; i < depth; i++) {
|
|
9
|
+
obj = { nested: obj };
|
|
10
|
+
}
|
|
11
|
+
return obj;
|
|
11
12
|
}
|
|
12
13
|
|
|
13
14
|
function generateRawString(
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
15
|
+
depth: number,
|
|
16
|
+
count: number,
|
|
17
|
+
sizeFactor: number,
|
|
17
18
|
): string {
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
19
|
+
const jsons = [];
|
|
20
|
+
for (let i = 0; i < count; i++) {
|
|
21
|
+
const base = generateNestedJSON(depth);
|
|
22
|
+
(base as any).padding = "x".repeat(sizeFactor);
|
|
23
|
+
jsons.push(JSON.stringify(base));
|
|
24
|
+
}
|
|
25
|
+
return jsons.join(" some text in between ");
|
|
25
26
|
}
|
|
26
27
|
|
|
27
28
|
function getMemoryUsageMB(): number {
|
|
28
|
-
|
|
29
|
-
|
|
29
|
+
const used = process.memoryUsage().heapUsed / 1024 / 1024;
|
|
30
|
+
return Math.round(used * 100) / 100;
|
|
30
31
|
}
|
|
31
32
|
|
|
32
33
|
async function runBenchmark() {
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
34
|
+
const depths = [1, 3, 5, 10, 20];
|
|
35
|
+
const sizes = [0, 1000, 10000, 50_000]; // padding sizes
|
|
36
|
+
const counts = [1, 5, 10, 20]; // number of JSONs per string
|
|
36
37
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
38
|
+
for (const depth of depths) {
|
|
39
|
+
for (const size of sizes) {
|
|
40
|
+
for (const count of counts) {
|
|
41
|
+
const input = generateRawString(depth, count, size);
|
|
41
42
|
|
|
42
|
-
|
|
43
|
-
|
|
43
|
+
const memBefore = getMemoryUsageMB();
|
|
44
|
+
const start = performance.now();
|
|
44
45
|
|
|
45
|
-
|
|
46
|
+
const result = extractJsons(input);
|
|
46
47
|
|
|
47
|
-
|
|
48
|
-
|
|
48
|
+
const end = performance.now();
|
|
49
|
+
const memAfter = getMemoryUsageMB();
|
|
49
50
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
51
|
+
console.log(
|
|
52
|
+
`Depth=${depth}, Size=${size}, Count=${count} | Time=${(end - start).toFixed(3)} ms | ` +
|
|
53
|
+
`MemΔ=${(memAfter - memBefore).toFixed(3)} MB | OutputLen=${JSON.stringify(result).length}`,
|
|
54
|
+
);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
55
57
|
}
|
|
56
|
-
}
|
|
57
58
|
}
|
|
58
59
|
|
|
59
60
|
runBenchmark();
|