@jackchuka/gql-ingest 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -7
- package/bin/cli.js +71 -65
- package/dist/config.d.ts.map +1 -1
- package/dist/dependency-resolver.d.ts.map +1 -1
- package/dist/graphql-client.d.ts.map +1 -1
- package/dist/mapper.d.ts +1 -1
- package/dist/mapper.d.ts.map +1 -1
- package/dist/metrics.d.ts.map +1 -1
- package/dist/readers/data-reader.d.ts.map +1 -1
- package/dist/readers/json.d.ts.map +1 -1
- package/dist/readers/jsonl.d.ts.map +1 -1
- package/dist/readers/yaml.d.ts.map +1 -1
- package/package.json +31 -25
- package/src/cli.ts +0 -187
- package/src/config.test.ts +0 -272
- package/src/config.ts +0 -125
- package/src/dependency-resolver.test.ts +0 -211
- package/src/dependency-resolver.ts +0 -102
- package/src/graphql-client.test.ts +0 -219
- package/src/graphql-client.ts +0 -151
- package/src/mapper.test.ts +0 -607
- package/src/mapper.ts +0 -489
- package/src/metrics.test.ts +0 -207
- package/src/metrics.ts +0 -161
- package/src/readers/csv.test.ts +0 -82
- package/src/readers/csv.ts +0 -29
- package/src/readers/data-reader.test.ts +0 -104
- package/src/readers/data-reader.ts +0 -61
- package/src/readers/index.ts +0 -18
- package/src/readers/json.test.ts +0 -80
- package/src/readers/json.ts +0 -27
- package/src/readers/jsonl.test.ts +0 -96
- package/src/readers/jsonl.ts +0 -28
- package/src/readers/yaml.test.ts +0 -95
- package/src/readers/yaml.ts +0 -28
package/src/metrics.ts
DELETED
|
@@ -1,161 +0,0 @@
|
|
|
1
|
-
export interface EntityMetrics {
|
|
2
|
-
entityName: string;
|
|
3
|
-
successCount: number;
|
|
4
|
-
failureCount: number;
|
|
5
|
-
startTime: number;
|
|
6
|
-
endTime?: number;
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
export interface ProcessingMetrics {
|
|
10
|
-
totalEntities: number;
|
|
11
|
-
totalSuccesses: number;
|
|
12
|
-
totalFailures: number;
|
|
13
|
-
entityMetrics: Map<string, EntityMetrics>;
|
|
14
|
-
requestDurations: number[];
|
|
15
|
-
retryAttempts: number;
|
|
16
|
-
retrySuccesses: number;
|
|
17
|
-
retryFailures: number;
|
|
18
|
-
startTime: number;
|
|
19
|
-
endTime?: number;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
export class MetricsCollector {
|
|
23
|
-
private metrics: ProcessingMetrics;
|
|
24
|
-
|
|
25
|
-
constructor() {
|
|
26
|
-
this.metrics = {
|
|
27
|
-
totalEntities: 0,
|
|
28
|
-
totalSuccesses: 0,
|
|
29
|
-
totalFailures: 0,
|
|
30
|
-
entityMetrics: new Map(),
|
|
31
|
-
requestDurations: [],
|
|
32
|
-
retryAttempts: 0,
|
|
33
|
-
retrySuccesses: 0,
|
|
34
|
-
retryFailures: 0,
|
|
35
|
-
startTime: Date.now(),
|
|
36
|
-
};
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
startEntityProcessing(entityName: string): void {
|
|
40
|
-
if (!this.metrics.entityMetrics.has(entityName)) {
|
|
41
|
-
this.metrics.entityMetrics.set(entityName, {
|
|
42
|
-
entityName,
|
|
43
|
-
successCount: 0,
|
|
44
|
-
failureCount: 0,
|
|
45
|
-
startTime: Date.now(),
|
|
46
|
-
});
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
recordSuccess(entityName: string): void {
|
|
51
|
-
const entityMetric = this.metrics.entityMetrics.get(entityName);
|
|
52
|
-
if (entityMetric) {
|
|
53
|
-
entityMetric.successCount++;
|
|
54
|
-
this.metrics.totalSuccesses++;
|
|
55
|
-
this.metrics.totalEntities++;
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
recordFailure(entityName: string): void {
|
|
60
|
-
const entityMetric = this.metrics.entityMetrics.get(entityName);
|
|
61
|
-
if (entityMetric) {
|
|
62
|
-
entityMetric.failureCount++;
|
|
63
|
-
this.metrics.totalFailures++;
|
|
64
|
-
this.metrics.totalEntities++;
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
finishEntityProcessing(entityName: string): void {
|
|
69
|
-
const entityMetric = this.metrics.entityMetrics.get(entityName);
|
|
70
|
-
if (entityMetric) {
|
|
71
|
-
entityMetric.endTime = Date.now();
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
finishProcessing(): ProcessingMetrics {
|
|
76
|
-
this.metrics.endTime = Date.now();
|
|
77
|
-
return { ...this.metrics };
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
getEntityMetrics(entityName: string): EntityMetrics | undefined {
|
|
81
|
-
return this.metrics.entityMetrics.get(entityName);
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
getTotalProcessed(): number {
|
|
85
|
-
return this.metrics.totalEntities;
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
getSuccessRate(): number {
|
|
89
|
-
if (this.metrics.totalEntities === 0) return 0;
|
|
90
|
-
return (this.metrics.totalSuccesses / this.metrics.totalEntities) * 100;
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
recordRequestDuration(duration: number): void {
|
|
94
|
-
this.metrics.requestDurations.push(duration);
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
recordRetrySuccess(attempts: number): void {
|
|
98
|
-
this.metrics.retryAttempts += attempts;
|
|
99
|
-
this.metrics.retrySuccesses++;
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
recordRetryFailure(attempts: number): void {
|
|
103
|
-
this.metrics.retryAttempts += attempts;
|
|
104
|
-
this.metrics.retryFailures++;
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
getAverageRequestDuration(): number {
|
|
108
|
-
if (this.metrics.requestDurations.length === 0) return 0;
|
|
109
|
-
const sum = this.metrics.requestDurations.reduce((a, b) => a + b, 0);
|
|
110
|
-
return sum / this.metrics.requestDurations.length;
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
getDurationMs(): number {
|
|
114
|
-
const endTime = this.metrics.endTime || Date.now();
|
|
115
|
-
return endTime - this.metrics.startTime;
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
generateSummary(): string {
|
|
119
|
-
const duration = this.getDurationMs();
|
|
120
|
-
const successRate = this.getSuccessRate();
|
|
121
|
-
const avgRequestDuration = this.getAverageRequestDuration();
|
|
122
|
-
|
|
123
|
-
let summary = `\n📊 Processing Summary:\n`;
|
|
124
|
-
summary += ` Total Processed: ${this.metrics.totalEntities}\n`;
|
|
125
|
-
summary += ` ✓ Successes: ${this.metrics.totalSuccesses}\n`;
|
|
126
|
-
summary += ` ✗ Failures: ${this.metrics.totalFailures}\n`;
|
|
127
|
-
summary += ` Success Rate: ${successRate.toFixed(1)}%\n`;
|
|
128
|
-
summary += ` Duration: ${(duration / 1000).toFixed(2)}s\n`;
|
|
129
|
-
|
|
130
|
-
if (this.metrics.requestDurations.length > 0) {
|
|
131
|
-
summary += ` Avg Request Time: ${avgRequestDuration.toFixed(0)}ms\n`;
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
if (this.metrics.retryAttempts > 0) {
|
|
135
|
-
summary += ` Retry Attempts: ${this.metrics.retryAttempts}\n`;
|
|
136
|
-
summary += ` Retry Successes: ${this.metrics.retrySuccesses}\n`;
|
|
137
|
-
summary += ` Retry Failures: ${this.metrics.retryFailures}\n`;
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
if (this.metrics.entityMetrics.size > 0) {
|
|
141
|
-
summary += `\n📋 Per-Entity Breakdown:\n`;
|
|
142
|
-
for (const [entityName, entityMetric] of this.metrics.entityMetrics) {
|
|
143
|
-
const entityTotal =
|
|
144
|
-
entityMetric.successCount + entityMetric.failureCount;
|
|
145
|
-
const entityRate =
|
|
146
|
-
entityTotal > 0 ? (entityMetric.successCount / entityTotal) * 100 : 0;
|
|
147
|
-
const entityDuration = entityMetric.endTime
|
|
148
|
-
? entityMetric.endTime - entityMetric.startTime
|
|
149
|
-
: 0;
|
|
150
|
-
|
|
151
|
-
summary += ` ${entityName}: ${entityTotal} total (${
|
|
152
|
-
entityMetric.successCount
|
|
153
|
-
} ✓, ${entityMetric.failureCount} ✗) - ${entityRate.toFixed(
|
|
154
|
-
1
|
|
155
|
-
)}% success - ${(entityDuration / 1000).toFixed(2)}s\n`;
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
return summary;
|
|
160
|
-
}
|
|
161
|
-
}
|
package/src/readers/csv.test.ts
DELETED
|
@@ -1,82 +0,0 @@
|
|
|
1
|
-
import fs from "fs";
|
|
2
|
-
import path from "path";
|
|
3
|
-
import { readCsvFile } from "./csv";
|
|
4
|
-
|
|
5
|
-
describe("CSV Reader", () => {
|
|
6
|
-
const testDataDir = path.join(__dirname, "test-data");
|
|
7
|
-
const testCsvPath = path.join(testDataDir, "test.csv");
|
|
8
|
-
|
|
9
|
-
beforeAll(() => {
|
|
10
|
-
if (!fs.existsSync(testDataDir)) {
|
|
11
|
-
fs.mkdirSync(testDataDir, { recursive: true });
|
|
12
|
-
}
|
|
13
|
-
});
|
|
14
|
-
|
|
15
|
-
afterAll(() => {
|
|
16
|
-
if (fs.existsSync(testDataDir)) {
|
|
17
|
-
fs.rmSync(testDataDir, { recursive: true });
|
|
18
|
-
}
|
|
19
|
-
});
|
|
20
|
-
|
|
21
|
-
beforeEach(() => {
|
|
22
|
-
if (fs.existsSync(testCsvPath)) {
|
|
23
|
-
fs.unlinkSync(testCsvPath);
|
|
24
|
-
}
|
|
25
|
-
});
|
|
26
|
-
|
|
27
|
-
it("should read a simple CSV file", async () => {
|
|
28
|
-
const csvContent = "name,age\nJohn,30\nJane,25";
|
|
29
|
-
fs.writeFileSync(testCsvPath, csvContent);
|
|
30
|
-
|
|
31
|
-
const result = await readCsvFile(testCsvPath);
|
|
32
|
-
|
|
33
|
-
expect(result).toEqual([
|
|
34
|
-
{ name: "John", age: "30" },
|
|
35
|
-
{ name: "Jane", age: "25" },
|
|
36
|
-
]);
|
|
37
|
-
});
|
|
38
|
-
|
|
39
|
-
it("should read CSV with special characters", async () => {
|
|
40
|
-
const csvContent =
|
|
41
|
-
'name,description\n"John Doe","A person with, comma"\n"Jane\'s Data","Quote test"';
|
|
42
|
-
fs.writeFileSync(testCsvPath, csvContent);
|
|
43
|
-
|
|
44
|
-
const result = await readCsvFile(testCsvPath);
|
|
45
|
-
|
|
46
|
-
expect(result).toEqual([
|
|
47
|
-
{ name: "John Doe", description: "A person with, comma" },
|
|
48
|
-
{ name: "Jane's Data", description: "Quote test" },
|
|
49
|
-
]);
|
|
50
|
-
});
|
|
51
|
-
|
|
52
|
-
it("should handle empty CSV file", async () => {
|
|
53
|
-
const csvContent = "name,age\n";
|
|
54
|
-
fs.writeFileSync(testCsvPath, csvContent);
|
|
55
|
-
|
|
56
|
-
const result = await readCsvFile(testCsvPath);
|
|
57
|
-
|
|
58
|
-
expect(result).toEqual([]);
|
|
59
|
-
});
|
|
60
|
-
|
|
61
|
-
it("should handle CSV with only headers", async () => {
|
|
62
|
-
const csvContent = "name,age";
|
|
63
|
-
fs.writeFileSync(testCsvPath, csvContent);
|
|
64
|
-
|
|
65
|
-
const result = await readCsvFile(testCsvPath);
|
|
66
|
-
|
|
67
|
-
expect(result).toEqual([]);
|
|
68
|
-
});
|
|
69
|
-
|
|
70
|
-
it("should handle CSV with missing values", async () => {
|
|
71
|
-
const csvContent = "name,age,city\nJohn,30,\nJane,,Boston\n,25,NYC";
|
|
72
|
-
fs.writeFileSync(testCsvPath, csvContent);
|
|
73
|
-
|
|
74
|
-
const result = await readCsvFile(testCsvPath);
|
|
75
|
-
|
|
76
|
-
expect(result).toEqual([
|
|
77
|
-
{ name: "John", age: "30", city: "" },
|
|
78
|
-
{ name: "Jane", age: "", city: "Boston" },
|
|
79
|
-
{ name: "", age: "25", city: "NYC" },
|
|
80
|
-
]);
|
|
81
|
-
});
|
|
82
|
-
});
|
package/src/readers/csv.ts
DELETED
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
import fs from "fs";
|
|
2
|
-
import csv from "csv-parser";
|
|
3
|
-
import { DataReader, DataRow } from "./data-reader";
|
|
4
|
-
|
|
5
|
-
export interface CsvRow {
|
|
6
|
-
[key: string]: string;
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
export async function readCsvFile(filePath: string): Promise<CsvRow[]> {
|
|
10
|
-
return new Promise((resolve, reject) => {
|
|
11
|
-
const results: CsvRow[] = [];
|
|
12
|
-
|
|
13
|
-
fs.createReadStream(filePath)
|
|
14
|
-
.pipe(csv())
|
|
15
|
-
.on("data", (data) => results.push(data))
|
|
16
|
-
.on("end", () => resolve(results))
|
|
17
|
-
.on("error", (error) => reject(error));
|
|
18
|
-
});
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
export class CsvReader extends DataReader {
|
|
22
|
-
getSupportedExtensions(): string[] {
|
|
23
|
-
return ["csv"];
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
async readFile(filePath: string): Promise<DataRow[]> {
|
|
27
|
-
return readCsvFile(filePath);
|
|
28
|
-
}
|
|
29
|
-
}
|
|
@@ -1,104 +0,0 @@
|
|
|
1
|
-
import { DataReader, DataReaderFactory } from "./data-reader";
|
|
2
|
-
|
|
3
|
-
class TestReader extends DataReader {
|
|
4
|
-
getSupportedExtensions(): string[] {
|
|
5
|
-
return ["test", "tst"];
|
|
6
|
-
}
|
|
7
|
-
|
|
8
|
-
async readFile(filePath: string): Promise<any[]> {
|
|
9
|
-
return [{ test: true }];
|
|
10
|
-
}
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
describe("DataReader", () => {
|
|
14
|
-
describe("canHandle", () => {
|
|
15
|
-
it("should check if reader can handle file based on extension", () => {
|
|
16
|
-
const reader = new TestReader();
|
|
17
|
-
|
|
18
|
-
expect(reader.canHandle("file.test")).toBe(true);
|
|
19
|
-
expect(reader.canHandle("file.tst")).toBe(true);
|
|
20
|
-
expect(reader.canHandle("path/to/file.test")).toBe(true);
|
|
21
|
-
expect(reader.canHandle("file.other")).toBe(false);
|
|
22
|
-
expect(reader.canHandle("file")).toBe(false);
|
|
23
|
-
});
|
|
24
|
-
});
|
|
25
|
-
});
|
|
26
|
-
|
|
27
|
-
describe("DataReaderFactory", () => {
|
|
28
|
-
beforeEach(() => {
|
|
29
|
-
// Clear the readers array before each test
|
|
30
|
-
(DataReaderFactory as any).readers = [];
|
|
31
|
-
});
|
|
32
|
-
|
|
33
|
-
describe("registerReader", () => {
|
|
34
|
-
it("should register a reader", () => {
|
|
35
|
-
const reader = new TestReader();
|
|
36
|
-
DataReaderFactory.registerReader(reader);
|
|
37
|
-
|
|
38
|
-
expect(DataReaderFactory.getSupportedFormats()).toContain("test");
|
|
39
|
-
expect(DataReaderFactory.getSupportedFormats()).toContain("tst");
|
|
40
|
-
});
|
|
41
|
-
});
|
|
42
|
-
|
|
43
|
-
describe("getReader", () => {
|
|
44
|
-
beforeEach(() => {
|
|
45
|
-
const reader = new TestReader();
|
|
46
|
-
DataReaderFactory.registerReader(reader);
|
|
47
|
-
});
|
|
48
|
-
|
|
49
|
-
it("should get reader by file extension", () => {
|
|
50
|
-
const reader = DataReaderFactory.getReader("file.test");
|
|
51
|
-
expect(reader).toBeInstanceOf(TestReader);
|
|
52
|
-
});
|
|
53
|
-
|
|
54
|
-
it("should get reader by format override", () => {
|
|
55
|
-
const reader = DataReaderFactory.getReader("file.other", "test");
|
|
56
|
-
expect(reader).toBeInstanceOf(TestReader);
|
|
57
|
-
});
|
|
58
|
-
|
|
59
|
-
it("should prioritize format override over file extension", () => {
|
|
60
|
-
const reader = DataReaderFactory.getReader("file.unknown", "tst");
|
|
61
|
-
expect(reader).toBeInstanceOf(TestReader);
|
|
62
|
-
});
|
|
63
|
-
|
|
64
|
-
it("should throw error when no reader found", () => {
|
|
65
|
-
expect(() => DataReaderFactory.getReader("file.unknown")).toThrow(
|
|
66
|
-
"No reader found for file: file.unknown"
|
|
67
|
-
);
|
|
68
|
-
});
|
|
69
|
-
|
|
70
|
-
it("should throw error when format specified but no reader found", () => {
|
|
71
|
-
expect(() => DataReaderFactory.getReader("file.txt", "unknown")).toThrow(
|
|
72
|
-
"No reader found for file: file.txt with format: unknown"
|
|
73
|
-
);
|
|
74
|
-
});
|
|
75
|
-
});
|
|
76
|
-
|
|
77
|
-
describe("getSupportedFormats", () => {
|
|
78
|
-
it("should return empty array when no readers registered", () => {
|
|
79
|
-
expect(DataReaderFactory.getSupportedFormats()).toEqual([]);
|
|
80
|
-
});
|
|
81
|
-
|
|
82
|
-
it("should return all supported formats", () => {
|
|
83
|
-
const reader1 = new TestReader();
|
|
84
|
-
DataReaderFactory.registerReader(reader1);
|
|
85
|
-
|
|
86
|
-
const formats = DataReaderFactory.getSupportedFormats();
|
|
87
|
-
expect(formats).toContain("test");
|
|
88
|
-
expect(formats).toContain("tst");
|
|
89
|
-
expect(formats.length).toBe(2);
|
|
90
|
-
});
|
|
91
|
-
|
|
92
|
-
it("should not duplicate formats", () => {
|
|
93
|
-
const reader1 = new TestReader();
|
|
94
|
-
const reader2 = new TestReader();
|
|
95
|
-
DataReaderFactory.registerReader(reader1);
|
|
96
|
-
DataReaderFactory.registerReader(reader2);
|
|
97
|
-
|
|
98
|
-
const formats = DataReaderFactory.getSupportedFormats();
|
|
99
|
-
expect(formats).toContain("test");
|
|
100
|
-
expect(formats).toContain("tst");
|
|
101
|
-
expect(formats.length).toBe(2);
|
|
102
|
-
});
|
|
103
|
-
});
|
|
104
|
-
});
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
export interface DataRow {
|
|
2
|
-
[key: string]: any;
|
|
3
|
-
}
|
|
4
|
-
|
|
5
|
-
export abstract class DataReader {
|
|
6
|
-
abstract readFile(filePath: string): Promise<DataRow[]>;
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
* Get the supported file extensions for this reader
|
|
10
|
-
*/
|
|
11
|
-
abstract getSupportedExtensions(): string[];
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* Check if this reader can handle the given file
|
|
15
|
-
*/
|
|
16
|
-
canHandle(filePath: string): boolean {
|
|
17
|
-
const extension = filePath.split(".").pop()?.toLowerCase();
|
|
18
|
-
return extension
|
|
19
|
-
? this.getSupportedExtensions().includes(extension)
|
|
20
|
-
: false;
|
|
21
|
-
}
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
export class DataReaderFactory {
|
|
25
|
-
private static readers: DataReader[] = [];
|
|
26
|
-
|
|
27
|
-
static registerReader(reader: DataReader): void {
|
|
28
|
-
this.readers.push(reader);
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
static getReader(filePath: string, format?: string): DataReader {
|
|
32
|
-
// If format is specified, try to find reader by format
|
|
33
|
-
if (format) {
|
|
34
|
-
const reader = this.readers.find((r) =>
|
|
35
|
-
r.getSupportedExtensions().includes(format.toLowerCase())
|
|
36
|
-
);
|
|
37
|
-
if (reader) return reader;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
// Otherwise, try to find reader by file extension
|
|
41
|
-
const reader = this.readers.find((r) => r.canHandle(filePath));
|
|
42
|
-
|
|
43
|
-
if (!reader) {
|
|
44
|
-
throw new Error(
|
|
45
|
-
`No reader found for file: ${filePath}${
|
|
46
|
-
format ? ` with format: ${format}` : ""
|
|
47
|
-
}`
|
|
48
|
-
);
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
return reader;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
static getSupportedFormats(): string[] {
|
|
55
|
-
const formats = new Set<string>();
|
|
56
|
-
this.readers.forEach((reader) => {
|
|
57
|
-
reader.getSupportedExtensions().forEach((ext) => formats.add(ext));
|
|
58
|
-
});
|
|
59
|
-
return Array.from(formats);
|
|
60
|
-
}
|
|
61
|
-
}
|
package/src/readers/index.ts
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
export { DataReader, DataRow, DataReaderFactory } from "./data-reader";
|
|
2
|
-
export { CsvReader, readCsvFile, CsvRow } from "./csv";
|
|
3
|
-
export { JsonReader } from "./json";
|
|
4
|
-
export { YamlReader } from "./yaml";
|
|
5
|
-
export { JsonlReader } from "./jsonl";
|
|
6
|
-
|
|
7
|
-
// Register all readers
|
|
8
|
-
import { DataReaderFactory } from "./data-reader";
|
|
9
|
-
import { CsvReader } from "./csv";
|
|
10
|
-
import { JsonReader } from "./json";
|
|
11
|
-
import { YamlReader } from "./yaml";
|
|
12
|
-
import { JsonlReader } from "./jsonl";
|
|
13
|
-
|
|
14
|
-
// Register readers on module load
|
|
15
|
-
DataReaderFactory.registerReader(new CsvReader());
|
|
16
|
-
DataReaderFactory.registerReader(new JsonReader());
|
|
17
|
-
DataReaderFactory.registerReader(new YamlReader());
|
|
18
|
-
DataReaderFactory.registerReader(new JsonlReader());
|
package/src/readers/json.test.ts
DELETED
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
import fs from "fs/promises";
|
|
2
|
-
import { JsonReader } from "./json";
|
|
3
|
-
|
|
4
|
-
jest.mock("fs/promises");
|
|
5
|
-
|
|
6
|
-
describe("JsonReader", () => {
|
|
7
|
-
let reader: JsonReader;
|
|
8
|
-
const mockFs = fs as jest.Mocked<typeof fs>;
|
|
9
|
-
|
|
10
|
-
beforeEach(() => {
|
|
11
|
-
reader = new JsonReader();
|
|
12
|
-
jest.clearAllMocks();
|
|
13
|
-
});
|
|
14
|
-
|
|
15
|
-
describe("getSupportedExtensions", () => {
|
|
16
|
-
it("should return json as supported extension", () => {
|
|
17
|
-
expect(reader.getSupportedExtensions()).toEqual(["json"]);
|
|
18
|
-
});
|
|
19
|
-
});
|
|
20
|
-
|
|
21
|
-
describe("canHandle", () => {
|
|
22
|
-
it("should return true for .json files", () => {
|
|
23
|
-
expect(reader.canHandle("data.json")).toBe(true);
|
|
24
|
-
expect(reader.canHandle("path/to/file.json")).toBe(true);
|
|
25
|
-
expect(reader.canHandle("file.JSON")).toBe(true); // case insensitive
|
|
26
|
-
});
|
|
27
|
-
|
|
28
|
-
it("should return false for non-json files", () => {
|
|
29
|
-
expect(reader.canHandle("data.csv")).toBe(false);
|
|
30
|
-
expect(reader.canHandle("data.yaml")).toBe(false);
|
|
31
|
-
expect(reader.canHandle("data")).toBe(false);
|
|
32
|
-
});
|
|
33
|
-
});
|
|
34
|
-
|
|
35
|
-
describe("readFile", () => {
|
|
36
|
-
it("should read and parse JSON array", async () => {
|
|
37
|
-
const mockData = [
|
|
38
|
-
{ id: 1, name: "Item 1" },
|
|
39
|
-
{ id: 2, name: "Item 2" },
|
|
40
|
-
];
|
|
41
|
-
mockFs.readFile.mockResolvedValue(JSON.stringify(mockData));
|
|
42
|
-
|
|
43
|
-
const result = await reader.readFile("data.json");
|
|
44
|
-
|
|
45
|
-
expect(mockFs.readFile).toHaveBeenCalledWith("data.json", "utf8");
|
|
46
|
-
expect(result).toEqual(mockData);
|
|
47
|
-
});
|
|
48
|
-
|
|
49
|
-
it("should wrap single object in array", async () => {
|
|
50
|
-
const mockData = { id: 1, name: "Item 1" };
|
|
51
|
-
mockFs.readFile.mockResolvedValue(JSON.stringify(mockData));
|
|
52
|
-
|
|
53
|
-
const result = await reader.readFile("data.json");
|
|
54
|
-
|
|
55
|
-
expect(result).toEqual([mockData]);
|
|
56
|
-
});
|
|
57
|
-
|
|
58
|
-
it("should throw error for invalid JSON", async () => {
|
|
59
|
-
mockFs.readFile.mockResolvedValue("invalid json");
|
|
60
|
-
|
|
61
|
-
await expect(reader.readFile("data.json")).rejects.toThrow();
|
|
62
|
-
});
|
|
63
|
-
|
|
64
|
-
it("should throw error for null data", async () => {
|
|
65
|
-
mockFs.readFile.mockResolvedValue("null");
|
|
66
|
-
|
|
67
|
-
await expect(reader.readFile("data.json")).rejects.toThrow(
|
|
68
|
-
"Invalid JSON data structure in file: data.json. Expected array or object."
|
|
69
|
-
);
|
|
70
|
-
});
|
|
71
|
-
|
|
72
|
-
it("should throw error for primitive values", async () => {
|
|
73
|
-
mockFs.readFile.mockResolvedValue('"string value"');
|
|
74
|
-
|
|
75
|
-
await expect(reader.readFile("data.json")).rejects.toThrow(
|
|
76
|
-
"Invalid JSON data structure in file: data.json. Expected array or object."
|
|
77
|
-
);
|
|
78
|
-
});
|
|
79
|
-
});
|
|
80
|
-
});
|
package/src/readers/json.ts
DELETED
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
import fs from "fs/promises";
|
|
2
|
-
import { DataReader, DataRow } from "./data-reader";
|
|
3
|
-
|
|
4
|
-
export class JsonReader extends DataReader {
|
|
5
|
-
getSupportedExtensions(): string[] {
|
|
6
|
-
return ["json"];
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
async readFile(filePath: string): Promise<DataRow[]> {
|
|
10
|
-
const content = await fs.readFile(filePath, "utf8");
|
|
11
|
-
const data = JSON.parse(content);
|
|
12
|
-
|
|
13
|
-
// If the data is already an array, return it
|
|
14
|
-
if (Array.isArray(data)) {
|
|
15
|
-
return data;
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
// If it's a single object, wrap it in an array
|
|
19
|
-
if (typeof data === "object" && data !== null) {
|
|
20
|
-
return [data];
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
throw new Error(
|
|
24
|
-
`Invalid JSON data structure in file: ${filePath}. Expected array or object.`
|
|
25
|
-
);
|
|
26
|
-
}
|
|
27
|
-
}
|
|
@@ -1,96 +0,0 @@
|
|
|
1
|
-
import fs from "fs/promises";
|
|
2
|
-
import { JsonlReader } from "./jsonl";
|
|
3
|
-
|
|
4
|
-
jest.mock("fs/promises");
|
|
5
|
-
|
|
6
|
-
describe("JsonlReader", () => {
|
|
7
|
-
let reader: JsonlReader;
|
|
8
|
-
const mockFs = fs as jest.Mocked<typeof fs>;
|
|
9
|
-
|
|
10
|
-
beforeEach(() => {
|
|
11
|
-
reader = new JsonlReader();
|
|
12
|
-
jest.clearAllMocks();
|
|
13
|
-
});
|
|
14
|
-
|
|
15
|
-
describe("getSupportedExtensions", () => {
|
|
16
|
-
it("should return jsonl and ndjson as supported extensions", () => {
|
|
17
|
-
expect(reader.getSupportedExtensions()).toEqual(["jsonl", "ndjson"]);
|
|
18
|
-
});
|
|
19
|
-
});
|
|
20
|
-
|
|
21
|
-
describe("canHandle", () => {
|
|
22
|
-
it("should return true for .jsonl and .ndjson files", () => {
|
|
23
|
-
expect(reader.canHandle("data.jsonl")).toBe(true);
|
|
24
|
-
expect(reader.canHandle("data.ndjson")).toBe(true);
|
|
25
|
-
expect(reader.canHandle("path/to/file.jsonl")).toBe(true);
|
|
26
|
-
});
|
|
27
|
-
|
|
28
|
-
it("should return false for non-jsonl files", () => {
|
|
29
|
-
expect(reader.canHandle("data.json")).toBe(false);
|
|
30
|
-
expect(reader.canHandle("data.csv")).toBe(false);
|
|
31
|
-
expect(reader.canHandle("data")).toBe(false);
|
|
32
|
-
});
|
|
33
|
-
});
|
|
34
|
-
|
|
35
|
-
describe("readFile", () => {
|
|
36
|
-
it("should read and parse JSONL file", async () => {
|
|
37
|
-
const line1 = { id: 1, name: "Item 1" };
|
|
38
|
-
const line2 = { id: 2, name: "Item 2" };
|
|
39
|
-
const jsonlContent = `${JSON.stringify(line1)}\n${JSON.stringify(line2)}`;
|
|
40
|
-
mockFs.readFile.mockResolvedValue(jsonlContent);
|
|
41
|
-
|
|
42
|
-
const result = await reader.readFile("data.jsonl");
|
|
43
|
-
|
|
44
|
-
expect(mockFs.readFile).toHaveBeenCalledWith("data.jsonl", "utf8");
|
|
45
|
-
expect(result).toEqual([line1, line2]);
|
|
46
|
-
});
|
|
47
|
-
|
|
48
|
-
it("should handle empty lines", async () => {
|
|
49
|
-
const line1 = { id: 1, name: "Item 1" };
|
|
50
|
-
const line2 = { id: 2, name: "Item 2" };
|
|
51
|
-
const jsonlContent = `${JSON.stringify(line1)}\n\n${JSON.stringify(
|
|
52
|
-
line2
|
|
53
|
-
)}\n`;
|
|
54
|
-
mockFs.readFile.mockResolvedValue(jsonlContent);
|
|
55
|
-
|
|
56
|
-
const result = await reader.readFile("data.jsonl");
|
|
57
|
-
|
|
58
|
-
expect(result).toEqual([line1, line2]);
|
|
59
|
-
});
|
|
60
|
-
|
|
61
|
-
it("should handle single line", async () => {
|
|
62
|
-
const line = { id: 1, name: "Item 1" };
|
|
63
|
-
mockFs.readFile.mockResolvedValue(JSON.stringify(line));
|
|
64
|
-
|
|
65
|
-
const result = await reader.readFile("data.jsonl");
|
|
66
|
-
|
|
67
|
-
expect(result).toEqual([line]);
|
|
68
|
-
});
|
|
69
|
-
|
|
70
|
-
it("should throw error for invalid JSON on specific line", async () => {
|
|
71
|
-
const line1 = { id: 1, name: "Item 1" };
|
|
72
|
-
const jsonlContent = `${JSON.stringify(line1)}\ninvalid json\n`;
|
|
73
|
-
mockFs.readFile.mockResolvedValue(jsonlContent);
|
|
74
|
-
|
|
75
|
-
await expect(reader.readFile("data.jsonl")).rejects.toThrow(
|
|
76
|
-
"Invalid JSON at line 2 in file: data.jsonl"
|
|
77
|
-
);
|
|
78
|
-
});
|
|
79
|
-
|
|
80
|
-
it("should handle empty file", async () => {
|
|
81
|
-
mockFs.readFile.mockResolvedValue("");
|
|
82
|
-
|
|
83
|
-
const result = await reader.readFile("data.jsonl");
|
|
84
|
-
|
|
85
|
-
expect(result).toEqual([]);
|
|
86
|
-
});
|
|
87
|
-
|
|
88
|
-
it("should handle file with only whitespace", async () => {
|
|
89
|
-
mockFs.readFile.mockResolvedValue("\n\n \n\t\n");
|
|
90
|
-
|
|
91
|
-
const result = await reader.readFile("data.jsonl");
|
|
92
|
-
|
|
93
|
-
expect(result).toEqual([]);
|
|
94
|
-
});
|
|
95
|
-
});
|
|
96
|
-
});
|
package/src/readers/jsonl.ts
DELETED
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
import fs from "fs/promises";
|
|
2
|
-
import { DataReader, DataRow } from "./data-reader";
|
|
3
|
-
|
|
4
|
-
export class JsonlReader extends DataReader {
|
|
5
|
-
getSupportedExtensions(): string[] {
|
|
6
|
-
return ["jsonl", "ndjson"];
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
async readFile(filePath: string): Promise<DataRow[]> {
|
|
10
|
-
const content = await fs.readFile(filePath, "utf8");
|
|
11
|
-
const lines = content.split("\n").filter((line) => line.trim());
|
|
12
|
-
|
|
13
|
-
const results: DataRow[] = [];
|
|
14
|
-
|
|
15
|
-
for (let i = 0; i < lines.length; i++) {
|
|
16
|
-
try {
|
|
17
|
-
const data = JSON.parse(lines[i]);
|
|
18
|
-
results.push(data);
|
|
19
|
-
} catch (error) {
|
|
20
|
-
throw new Error(
|
|
21
|
-
`Invalid JSON at line ${i + 1} in file: ${filePath}. Error: ${error}`
|
|
22
|
-
);
|
|
23
|
-
}
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
return results;
|
|
27
|
-
}
|
|
28
|
-
}
|