@sourcepress/knowledge 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +4 -0
- package/.turbo/turbo-test.log +21 -0
- package/dist/__tests__/graph-builder.test.d.ts +2 -0
- package/dist/__tests__/graph-builder.test.d.ts.map +1 -0
- package/dist/__tests__/graph-builder.test.js +122 -0
- package/dist/__tests__/graph-builder.test.js.map +1 -0
- package/dist/__tests__/graph-ops.test.d.ts +2 -0
- package/dist/__tests__/graph-ops.test.d.ts.map +1 -0
- package/dist/__tests__/graph-ops.test.js +181 -0
- package/dist/__tests__/graph-ops.test.js.map +1 -0
- package/dist/__tests__/ingestion.test.d.ts +2 -0
- package/dist/__tests__/ingestion.test.d.ts.map +1 -0
- package/dist/__tests__/ingestion.test.js +108 -0
- package/dist/__tests__/ingestion.test.js.map +1 -0
- package/dist/__tests__/json-file-store.test.d.ts +2 -0
- package/dist/__tests__/json-file-store.test.d.ts.map +1 -0
- package/dist/__tests__/json-file-store.test.js +180 -0
- package/dist/__tests__/json-file-store.test.js.map +1 -0
- package/dist/__tests__/knowledge-engine.test.d.ts +2 -0
- package/dist/__tests__/knowledge-engine.test.d.ts.map +1 -0
- package/dist/__tests__/knowledge-engine.test.js +152 -0
- package/dist/__tests__/knowledge-engine.test.js.map +1 -0
- package/dist/__tests__/knowledge-store.test.d.ts +2 -0
- package/dist/__tests__/knowledge-store.test.d.ts.map +1 -0
- package/dist/__tests__/knowledge-store.test.js +97 -0
- package/dist/__tests__/knowledge-store.test.js.map +1 -0
- package/dist/__tests__/scraper.test.d.ts +2 -0
- package/dist/__tests__/scraper.test.d.ts.map +1 -0
- package/dist/__tests__/scraper.test.js +66 -0
- package/dist/__tests__/scraper.test.js.map +1 -0
- package/dist/__tests__/sitemap-parser.test.d.ts +2 -0
- package/dist/__tests__/sitemap-parser.test.d.ts.map +1 -0
- package/dist/__tests__/sitemap-parser.test.js +75 -0
- package/dist/__tests__/sitemap-parser.test.js.map +1 -0
- package/dist/graph-builder.d.ts +17 -0
- package/dist/graph-builder.d.ts.map +1 -0
- package/dist/graph-builder.js +98 -0
- package/dist/graph-builder.js.map +1 -0
- package/dist/graph-ops.d.ts +21 -0
- package/dist/graph-ops.d.ts.map +1 -0
- package/dist/graph-ops.js +108 -0
- package/dist/graph-ops.js.map +1 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -0
- package/dist/ingestion/index.d.ts +4 -0
- package/dist/ingestion/index.d.ts.map +1 -0
- package/dist/ingestion/index.js +3 -0
- package/dist/ingestion/index.js.map +1 -0
- package/dist/ingestion/scraper.d.ts +22 -0
- package/dist/ingestion/scraper.d.ts.map +1 -0
- package/dist/ingestion/scraper.js +118 -0
- package/dist/ingestion/scraper.js.map +1 -0
- package/dist/ingestion/sitemap-parser.d.ts +32 -0
- package/dist/ingestion/sitemap-parser.d.ts.map +1 -0
- package/dist/ingestion/sitemap-parser.js +104 -0
- package/dist/ingestion/sitemap-parser.js.map +1 -0
- package/dist/ingestion/types.d.ts +58 -0
- package/dist/ingestion/types.d.ts.map +1 -0
- package/dist/ingestion/types.js +2 -0
- package/dist/ingestion/types.js.map +1 -0
- package/dist/json-file-store.d.ts +19 -0
- package/dist/json-file-store.d.ts.map +1 -0
- package/dist/json-file-store.js +100 -0
- package/dist/json-file-store.js.map +1 -0
- package/dist/knowledge-engine.d.ts +45 -0
- package/dist/knowledge-engine.d.ts.map +1 -0
- package/dist/knowledge-engine.js +160 -0
- package/dist/knowledge-engine.js.map +1 -0
- package/dist/knowledge-store.d.ts +14 -0
- package/dist/knowledge-store.d.ts.map +1 -0
- package/dist/knowledge-store.js +40 -0
- package/dist/knowledge-store.js.map +1 -0
- package/dist/types.d.ts +67 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +26 -0
- package/src/__tests__/graph-builder.test.ts +129 -0
- package/src/__tests__/graph-ops.test.ts +189 -0
- package/src/__tests__/ingestion.test.ts +127 -0
- package/src/__tests__/json-file-store.test.ts +206 -0
- package/src/__tests__/knowledge-engine.test.ts +177 -0
- package/src/__tests__/knowledge-store.test.ts +111 -0
- package/src/__tests__/scraper.test.ts +74 -0
- package/src/__tests__/sitemap-parser.test.ts +85 -0
- package/src/graph-builder.ts +109 -0
- package/src/graph-ops.ts +129 -0
- package/src/index.ts +27 -0
- package/src/ingestion/index.ts +10 -0
- package/src/ingestion/scraper.ts +137 -0
- package/src/ingestion/sitemap-parser.ts +119 -0
- package/src/ingestion/types.ts +57 -0
- package/src/json-file-store.ts +127 -0
- package/src/knowledge-engine.ts +217 -0
- package/src/knowledge-store.ts +49 -0
- package/src/types.ts +76 -0
- package/tsconfig.json +5 -0
- package/vitest.config.ts +2 -0
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { GraphBuilder } from "../graph-builder.js";
|
|
3
|
+
import type { ExtractedEntity, ExtractedRelation } from "../types.js";
|
|
4
|
+
|
|
5
|
+
function makeEntities(): ExtractedEntity[] {
|
|
6
|
+
return [
|
|
7
|
+
{
|
|
8
|
+
type: "client",
|
|
9
|
+
name: "Acme Corp",
|
|
10
|
+
aliases: ["Acme", "ACME"],
|
|
11
|
+
confidence: 0.95,
|
|
12
|
+
source_file: "knowledge/clients/acme.md",
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
type: "technology",
|
|
16
|
+
name: "Next.js",
|
|
17
|
+
aliases: ["NextJS"],
|
|
18
|
+
confidence: 0.99,
|
|
19
|
+
source_file: "knowledge/clients/acme.md",
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
type: "technology",
|
|
23
|
+
name: "React",
|
|
24
|
+
aliases: [],
|
|
25
|
+
confidence: 0.9,
|
|
26
|
+
source_file: "knowledge/tech/react.md",
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
type: "service",
|
|
30
|
+
name: "Web Development",
|
|
31
|
+
aliases: ["Webbutveckling"],
|
|
32
|
+
confidence: 0.85,
|
|
33
|
+
source_file: "knowledge/services/web.md",
|
|
34
|
+
},
|
|
35
|
+
];
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function makeRelations(): ExtractedRelation[] {
|
|
39
|
+
return [
|
|
40
|
+
{
|
|
41
|
+
from_entity: "Acme Corp",
|
|
42
|
+
to_entity: "Next.js",
|
|
43
|
+
relation_type: "uses",
|
|
44
|
+
confidence: 0.9,
|
|
45
|
+
evidence: "Acme Corp is migrating to Next.js",
|
|
46
|
+
source_file: "knowledge/clients/acme.md",
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
from_entity: "Next.js",
|
|
50
|
+
to_entity: "React",
|
|
51
|
+
relation_type: "built_on",
|
|
52
|
+
confidence: 0.99,
|
|
53
|
+
evidence: "Next.js is a React framework",
|
|
54
|
+
source_file: "knowledge/tech/react.md",
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
from_entity: "Acme Corp",
|
|
58
|
+
to_entity: "Web Development",
|
|
59
|
+
relation_type: "needs",
|
|
60
|
+
confidence: 0.85,
|
|
61
|
+
evidence: "Acme needs web development services",
|
|
62
|
+
source_file: "knowledge/clients/acme.md",
|
|
63
|
+
},
|
|
64
|
+
];
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
describe("GraphBuilder", () => {
|
|
68
|
+
it("builds a graph from entities and relations", () => {
|
|
69
|
+
const builder = new GraphBuilder();
|
|
70
|
+
builder.addEntities(makeEntities());
|
|
71
|
+
builder.addRelations(makeRelations());
|
|
72
|
+
const graph = builder.build();
|
|
73
|
+
expect(graph.entities.size).toBe(4);
|
|
74
|
+
expect(graph.relations).toHaveLength(3);
|
|
75
|
+
expect(graph.file_count).toBe(3);
|
|
76
|
+
expect(graph.built_at).toBeTruthy();
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
it("deduplicates entities by name", () => {
|
|
80
|
+
const builder = new GraphBuilder();
|
|
81
|
+
builder.addEntities(makeEntities());
|
|
82
|
+
builder.addEntities([
|
|
83
|
+
{
|
|
84
|
+
type: "client",
|
|
85
|
+
name: "Acme Corp",
|
|
86
|
+
aliases: ["Acme Inc"],
|
|
87
|
+
confidence: 0.8,
|
|
88
|
+
source_file: "knowledge/other.md",
|
|
89
|
+
},
|
|
90
|
+
]);
|
|
91
|
+
const graph = builder.build();
|
|
92
|
+
const acme = graph.entities.get("Acme Corp");
|
|
93
|
+
expect(acme).toBeDefined();
|
|
94
|
+
// biome-ignore lint/style/noNonNullAssertion: asserted defined on line above
|
|
95
|
+
expect(acme!.confidence).toBe(0.95);
|
|
96
|
+
// biome-ignore lint/style/noNonNullAssertion: asserted defined on line above
|
|
97
|
+
expect(acme!.aliases).toContain("Acme");
|
|
98
|
+
// biome-ignore lint/style/noNonNullAssertion: asserted defined on line above
|
|
99
|
+
expect(acme!.aliases).toContain("ACME");
|
|
100
|
+
// biome-ignore lint/style/noNonNullAssertion: asserted defined on line above
|
|
101
|
+
expect(acme!.aliases).toContain("Acme Inc");
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
it("filters relations below confidence threshold", () => {
|
|
105
|
+
const builder = new GraphBuilder({ minConfidence: 0.88 });
|
|
106
|
+
builder.addEntities(makeEntities());
|
|
107
|
+
builder.addRelations(makeRelations());
|
|
108
|
+
const graph = builder.build();
|
|
109
|
+
expect(graph.relations).toHaveLength(2);
|
|
110
|
+
expect(graph.relations.every((r) => r.confidence >= 0.88)).toBe(true);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it("computes clusters from connected entities", () => {
|
|
114
|
+
const builder = new GraphBuilder();
|
|
115
|
+
builder.addEntities(makeEntities());
|
|
116
|
+
builder.addRelations(makeRelations());
|
|
117
|
+
const graph = builder.build();
|
|
118
|
+
expect(graph.clusters.length).toBeGreaterThanOrEqual(1);
|
|
119
|
+
expect(graph.clusters[0].entities.length).toBeGreaterThanOrEqual(2);
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
it("handles empty input", () => {
|
|
123
|
+
const builder = new GraphBuilder();
|
|
124
|
+
const graph = builder.build();
|
|
125
|
+
expect(graph.entities.size).toBe(0);
|
|
126
|
+
expect(graph.relations).toHaveLength(0);
|
|
127
|
+
expect(graph.clusters).toHaveLength(0);
|
|
128
|
+
});
|
|
129
|
+
});
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
import type { ContentFile } from "@sourcepress/core";
|
|
2
|
+
import { beforeEach, describe, expect, it } from "vitest";
|
|
3
|
+
import { GraphOps } from "../graph-ops.js";
|
|
4
|
+
import type { ExtractedEntity, ExtractedRelation, KnowledgeGraph } from "../types.js";
|
|
5
|
+
|
|
6
|
+
function makeGraph(): KnowledgeGraph {
|
|
7
|
+
const entities = new Map<string, ExtractedEntity>([
|
|
8
|
+
[
|
|
9
|
+
"Acme Corp",
|
|
10
|
+
{
|
|
11
|
+
type: "client",
|
|
12
|
+
name: "Acme Corp",
|
|
13
|
+
aliases: ["Acme"],
|
|
14
|
+
confidence: 0.95,
|
|
15
|
+
source_file: "knowledge/clients/acme.md",
|
|
16
|
+
},
|
|
17
|
+
],
|
|
18
|
+
[
|
|
19
|
+
"Next.js",
|
|
20
|
+
{
|
|
21
|
+
type: "technology",
|
|
22
|
+
name: "Next.js",
|
|
23
|
+
aliases: ["NextJS"],
|
|
24
|
+
confidence: 0.99,
|
|
25
|
+
source_file: "knowledge/tech/nextjs.md",
|
|
26
|
+
},
|
|
27
|
+
],
|
|
28
|
+
[
|
|
29
|
+
"React",
|
|
30
|
+
{
|
|
31
|
+
type: "technology",
|
|
32
|
+
name: "React",
|
|
33
|
+
aliases: [],
|
|
34
|
+
confidence: 0.9,
|
|
35
|
+
source_file: "knowledge/tech/react.md",
|
|
36
|
+
},
|
|
37
|
+
],
|
|
38
|
+
[
|
|
39
|
+
"Orphan Entity",
|
|
40
|
+
{
|
|
41
|
+
type: "concept",
|
|
42
|
+
name: "Orphan Entity",
|
|
43
|
+
aliases: [],
|
|
44
|
+
confidence: 0.7,
|
|
45
|
+
source_file: "knowledge/misc/orphan.md",
|
|
46
|
+
},
|
|
47
|
+
],
|
|
48
|
+
]);
|
|
49
|
+
const relations: ExtractedRelation[] = [
|
|
50
|
+
{
|
|
51
|
+
from_entity: "Acme Corp",
|
|
52
|
+
to_entity: "Next.js",
|
|
53
|
+
relation_type: "uses",
|
|
54
|
+
confidence: 0.9,
|
|
55
|
+
evidence: "Acme uses Next.js",
|
|
56
|
+
source_file: "knowledge/clients/acme.md",
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
from_entity: "Next.js",
|
|
60
|
+
to_entity: "React",
|
|
61
|
+
relation_type: "built_on",
|
|
62
|
+
confidence: 0.99,
|
|
63
|
+
evidence: "Next.js is built on React",
|
|
64
|
+
source_file: "knowledge/tech/nextjs.md",
|
|
65
|
+
},
|
|
66
|
+
];
|
|
67
|
+
return {
|
|
68
|
+
entities,
|
|
69
|
+
relations,
|
|
70
|
+
clusters: [
|
|
71
|
+
{
|
|
72
|
+
id: "cluster-0",
|
|
73
|
+
name: "Acme Corp",
|
|
74
|
+
entities: ["Acme Corp", "Next.js", "React"],
|
|
75
|
+
coherence_score: 0.95,
|
|
76
|
+
},
|
|
77
|
+
],
|
|
78
|
+
built_at: "2026-04-04T10:00:00Z",
|
|
79
|
+
file_count: 4,
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function makeContentFiles(): ContentFile[] {
|
|
84
|
+
return [
|
|
85
|
+
{
|
|
86
|
+
collection: "cases",
|
|
87
|
+
slug: "acme",
|
|
88
|
+
path: "content/cases/acme.mdx",
|
|
89
|
+
frontmatter: { title: "Acme Corp Case Study", client: "acme-corp" },
|
|
90
|
+
body: "Acme Corp migrated to Next.js with our help.",
|
|
91
|
+
provenance: {
|
|
92
|
+
generated_by: "claude-sonnet-4-5-20250514",
|
|
93
|
+
generated_at: "2026-04-01T10:00:00Z",
|
|
94
|
+
source_files: ["knowledge/clients/acme.md"],
|
|
95
|
+
},
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
collection: "posts",
|
|
99
|
+
slug: "react-guide",
|
|
100
|
+
path: "content/posts/react-guide.mdx",
|
|
101
|
+
frontmatter: { title: "React Guide" },
|
|
102
|
+
body: "A guide to React.",
|
|
103
|
+
provenance: {
|
|
104
|
+
generated_by: "claude-sonnet-4-5-20250514",
|
|
105
|
+
generated_at: "2026-04-03T10:00:00Z",
|
|
106
|
+
source_files: ["knowledge/tech/react.md"],
|
|
107
|
+
},
|
|
108
|
+
},
|
|
109
|
+
];
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
describe("GraphOps", () => {
|
|
113
|
+
let ops: GraphOps;
|
|
114
|
+
beforeEach(() => {
|
|
115
|
+
ops = new GraphOps(makeGraph());
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
describe("query", () => {
|
|
119
|
+
it("returns entity with relations and related entities", () => {
|
|
120
|
+
const result = ops.query("Acme Corp");
|
|
121
|
+
expect(result).not.toBeNull();
|
|
122
|
+
// biome-ignore lint/style/noNonNullAssertion: asserted not null on line above
|
|
123
|
+
expect(result!.entity.name).toBe("Acme Corp");
|
|
124
|
+
// biome-ignore lint/style/noNonNullAssertion: asserted not null on line above
|
|
125
|
+
expect(result!.relations).toHaveLength(1);
|
|
126
|
+
// biome-ignore lint/style/noNonNullAssertion: asserted not null on line above
|
|
127
|
+
expect(result!.related_entities).toHaveLength(1);
|
|
128
|
+
// biome-ignore lint/style/noNonNullAssertion: asserted not null on line above
|
|
129
|
+
expect(result!.related_entities[0].name).toBe("Next.js");
|
|
130
|
+
});
|
|
131
|
+
it("returns null for unknown entity", () => {
|
|
132
|
+
expect(ops.query("Unknown")).toBeNull();
|
|
133
|
+
});
|
|
134
|
+
it("finds entity by alias", () => {
|
|
135
|
+
const result = ops.query("Acme");
|
|
136
|
+
expect(result).not.toBeNull();
|
|
137
|
+
// biome-ignore lint/style/noNonNullAssertion: asserted not null on line above
|
|
138
|
+
expect(result!.entity.name).toBe("Acme Corp");
|
|
139
|
+
});
|
|
140
|
+
it("collects source files for entity", () => {
|
|
141
|
+
const result = ops.query("Next.js");
|
|
142
|
+
expect(result).not.toBeNull();
|
|
143
|
+
// biome-ignore lint/style/noNonNullAssertion: asserted not null on line above
|
|
144
|
+
expect(result!.files).toContain("knowledge/tech/nextjs.md");
|
|
145
|
+
// biome-ignore lint/style/noNonNullAssertion: asserted not null on line above
|
|
146
|
+
expect(result!.files).toContain("knowledge/clients/acme.md");
|
|
147
|
+
});
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
describe("findGaps", () => {
|
|
151
|
+
it("finds entities with knowledge but no content", () => {
|
|
152
|
+
const gaps = ops.findGaps(makeContentFiles());
|
|
153
|
+
expect(gaps.length).toBeGreaterThanOrEqual(1);
|
|
154
|
+
const orphanGap = gaps.find((g) => g.entity_name === "Orphan Entity");
|
|
155
|
+
expect(orphanGap).toBeDefined();
|
|
156
|
+
// biome-ignore lint/style/noNonNullAssertion: asserted defined on line above
|
|
157
|
+
expect(orphanGap!.content_file_count).toBe(0);
|
|
158
|
+
});
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
describe("findStale", () => {
|
|
162
|
+
it("detects content older than source knowledge", () => {
|
|
163
|
+
const stale = ops.findStale(makeContentFiles(), {
|
|
164
|
+
"knowledge/clients/acme.md": "2026-04-03T10:00:00Z",
|
|
165
|
+
"knowledge/tech/react.md": "2026-04-02T10:00:00Z",
|
|
166
|
+
});
|
|
167
|
+
expect(stale).toHaveLength(1);
|
|
168
|
+
expect(stale[0].content_path).toBe("content/cases/acme.mdx");
|
|
169
|
+
expect(stale[0].stale_sources).toContain("knowledge/clients/acme.md");
|
|
170
|
+
});
|
|
171
|
+
it("returns empty when all content is fresh", () => {
|
|
172
|
+
const stale = ops.findStale(makeContentFiles(), {
|
|
173
|
+
"knowledge/clients/acme.md": "2026-03-01T10:00:00Z",
|
|
174
|
+
"knowledge/tech/react.md": "2026-03-01T10:00:00Z",
|
|
175
|
+
});
|
|
176
|
+
expect(stale).toHaveLength(0);
|
|
177
|
+
});
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
describe("getStats", () => {
|
|
181
|
+
it("returns graph statistics", () => {
|
|
182
|
+
const stats = ops.getStats();
|
|
183
|
+
expect(stats.entity_count).toBe(4);
|
|
184
|
+
expect(stats.relation_count).toBe(2);
|
|
185
|
+
expect(stats.cluster_count).toBe(1);
|
|
186
|
+
expect(stats.file_count).toBe(4);
|
|
187
|
+
});
|
|
188
|
+
});
|
|
189
|
+
});
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import type { BudgetTracker, ResolvedProvider } from "@sourcepress/ai";
|
|
2
|
+
import { describe, expect, it, vi } from "vitest";
|
|
3
|
+
import { KnowledgeEngine } from "../knowledge-engine.js";
|
|
4
|
+
import { InMemoryKnowledgeStore } from "../knowledge-store.js";
|
|
5
|
+
|
|
6
|
+
// Mock AI functions
|
|
7
|
+
vi.mock("@sourcepress/ai", () => ({
|
|
8
|
+
classify: vi.fn().mockResolvedValue({
|
|
9
|
+
type: "article",
|
|
10
|
+
quality: "structured",
|
|
11
|
+
quality_score: 0.8,
|
|
12
|
+
}),
|
|
13
|
+
extract: vi.fn().mockResolvedValue({
|
|
14
|
+
entities: [{ type: "company", name: "Acme Corp", confidence: 0.9 }],
|
|
15
|
+
relations: [],
|
|
16
|
+
}),
|
|
17
|
+
}));
|
|
18
|
+
|
|
19
|
+
const ARTICLE_HTML = `<!DOCTYPE html>
|
|
20
|
+
<html><head><title>Acme Services</title></head>
|
|
21
|
+
<body><article>
|
|
22
|
+
<h1>Acme Corp Services</h1>
|
|
23
|
+
<p>Acme Corp provides enterprise software solutions for modern businesses.
|
|
24
|
+
Their platform handles everything from customer management to analytics.
|
|
25
|
+
Founded in 2010, Acme has grown to serve over 500 enterprise clients
|
|
26
|
+
worldwide with their innovative approach to business software.</p>
|
|
27
|
+
<h2>Key Offerings</h2>
|
|
28
|
+
<p>The company specializes in cloud-native applications, data pipelines,
|
|
29
|
+
and AI-powered automation tools that help businesses scale efficiently.</p>
|
|
30
|
+
</article></body></html>`;
|
|
31
|
+
|
|
32
|
+
const SITEMAP_XML = `<?xml version="1.0" encoding="UTF-8"?>
|
|
33
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
34
|
+
<url><loc>https://acme.com/services/web</loc></url>
|
|
35
|
+
<url><loc>https://acme.com/services/cloud</loc></url>
|
|
36
|
+
<url><loc>https://acme.com/blog/post-1</loc></url>
|
|
37
|
+
</urlset>`;
|
|
38
|
+
|
|
39
|
+
function createMockFetcher() {
|
|
40
|
+
return vi.fn().mockImplementation(async (url: string) => {
|
|
41
|
+
if (url.endsWith("sitemap.xml")) {
|
|
42
|
+
return { ok: true, status: 200, text: () => Promise.resolve(SITEMAP_XML) };
|
|
43
|
+
}
|
|
44
|
+
return { ok: true, status: 200, text: () => Promise.resolve(ARTICLE_HTML) };
|
|
45
|
+
}) as unknown as typeof globalThis.fetch;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function createEngine(fetcher?: typeof globalThis.fetch) {
|
|
49
|
+
const store = new InMemoryKnowledgeStore();
|
|
50
|
+
const mockProvider = {
|
|
51
|
+
provider: "mock",
|
|
52
|
+
model: "mock",
|
|
53
|
+
apiKey: "mock",
|
|
54
|
+
} as unknown as ResolvedProvider;
|
|
55
|
+
const mockBudget = { check: vi.fn(), record: vi.fn() } as unknown as BudgetTracker;
|
|
56
|
+
return new KnowledgeEngine(store, mockProvider, mockBudget, fetcher);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
describe("KnowledgeEngine ingestion", () => {
|
|
60
|
+
it("importUrl scrapes and stores a single URL", async () => {
|
|
61
|
+
const fetcher = createMockFetcher();
|
|
62
|
+
const engine = createEngine(fetcher);
|
|
63
|
+
const result = await engine.importUrl("https://acme.com/services/web");
|
|
64
|
+
|
|
65
|
+
expect(result.path).toBe("knowledge/acme.com/services/web.md");
|
|
66
|
+
expect(result.source).toBe("url");
|
|
67
|
+
expect(result.source_url).toBe("https://acme.com/services/web");
|
|
68
|
+
expect(result.body).toContain("Acme Corp");
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
it("parseSitemap returns grouped sections", async () => {
|
|
72
|
+
const fetcher = createMockFetcher();
|
|
73
|
+
const engine = createEngine(fetcher);
|
|
74
|
+
const result = await engine.parseSitemap("https://acme.com/sitemap.xml");
|
|
75
|
+
|
|
76
|
+
expect(result.total_urls).toBe(3);
|
|
77
|
+
const services = result.sections.find((s) => s.pattern === "/services/*");
|
|
78
|
+
expect(services).toBeDefined();
|
|
79
|
+
expect(services?.count).toBe(2);
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
it("importBatch processes multiple URLs with progress", async () => {
|
|
83
|
+
const fetcher = createMockFetcher();
|
|
84
|
+
const engine = createEngine(fetcher);
|
|
85
|
+
const progress = vi.fn();
|
|
86
|
+
|
|
87
|
+
const result = await engine.importBatch(
|
|
88
|
+
["https://acme.com/services/web", "https://acme.com/services/cloud"],
|
|
89
|
+
progress,
|
|
90
|
+
);
|
|
91
|
+
|
|
92
|
+
expect(result.completed).toBe(2);
|
|
93
|
+
expect(result.failed).toBe(0);
|
|
94
|
+
expect(result.results).toHaveLength(2);
|
|
95
|
+
expect(progress).toHaveBeenCalledTimes(2);
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
it("importBatch handles failures gracefully", async () => {
|
|
99
|
+
const fetcher = vi
|
|
100
|
+
.fn()
|
|
101
|
+
.mockResolvedValueOnce({ ok: true, status: 200, text: () => Promise.resolve(ARTICLE_HTML) })
|
|
102
|
+
.mockResolvedValueOnce({
|
|
103
|
+
ok: false,
|
|
104
|
+
status: 500,
|
|
105
|
+
statusText: "Server Error",
|
|
106
|
+
}) as unknown as typeof globalThis.fetch;
|
|
107
|
+
|
|
108
|
+
const engine = createEngine(fetcher);
|
|
109
|
+
const result = await engine.importBatch([
|
|
110
|
+
"https://acme.com/services/web",
|
|
111
|
+
"https://acme.com/broken",
|
|
112
|
+
]);
|
|
113
|
+
|
|
114
|
+
expect(result.completed).toBe(2);
|
|
115
|
+
expect(result.failed).toBe(1);
|
|
116
|
+
expect(result.results[0].success).toBe(true);
|
|
117
|
+
expect(result.results[1].success).toBe(false);
|
|
118
|
+
expect(result.results[1].error).toContain("500");
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
it("urlToPath converts URLs to knowledge paths", async () => {
|
|
122
|
+
const fetcher = createMockFetcher();
|
|
123
|
+
const engine = createEngine(fetcher);
|
|
124
|
+
const result = await engine.importUrl("https://example.com/blog/my-post");
|
|
125
|
+
expect(result.path).toBe("knowledge/example.com/blog/my-post.md");
|
|
126
|
+
});
|
|
127
|
+
});
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import { existsSync, mkdirSync, rmSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import type { KnowledgeFile } from "@sourcepress/core";
|
|
5
|
+
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
|
6
|
+
import { JsonFileStore } from "../json-file-store.js";
|
|
7
|
+
import type { KnowledgeGraph } from "../types.js";
|
|
8
|
+
|
|
9
|
+
function makeKnowledgeFile(overrides: Partial<KnowledgeFile> = {}): KnowledgeFile {
|
|
10
|
+
return {
|
|
11
|
+
path: "knowledge/clients/acme.md",
|
|
12
|
+
type: "project-notes",
|
|
13
|
+
quality: "structured",
|
|
14
|
+
quality_score: 8,
|
|
15
|
+
entities: [
|
|
16
|
+
{ type: "client", name: "Acme Corp" },
|
|
17
|
+
{ type: "technology", name: "Next.js" },
|
|
18
|
+
],
|
|
19
|
+
ingested_at: "2026-04-04T10:00:00Z",
|
|
20
|
+
source: "manual",
|
|
21
|
+
body: "Meeting with Acme Corp about Next.js migration.",
|
|
22
|
+
...overrides,
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function makeGraph(): KnowledgeGraph {
|
|
27
|
+
return {
|
|
28
|
+
entities: new Map([
|
|
29
|
+
[
|
|
30
|
+
"acme-corp",
|
|
31
|
+
{
|
|
32
|
+
type: "client",
|
|
33
|
+
name: "Acme Corp",
|
|
34
|
+
aliases: ["Acme"],
|
|
35
|
+
confidence: 0.95,
|
|
36
|
+
source_file: "knowledge/clients/acme.md",
|
|
37
|
+
},
|
|
38
|
+
],
|
|
39
|
+
]),
|
|
40
|
+
relations: [
|
|
41
|
+
{
|
|
42
|
+
from_entity: "acme-corp",
|
|
43
|
+
to_entity: "nextjs",
|
|
44
|
+
relation_type: "uses",
|
|
45
|
+
confidence: 0.9,
|
|
46
|
+
evidence: "Migration project",
|
|
47
|
+
source_file: "knowledge/clients/acme.md",
|
|
48
|
+
},
|
|
49
|
+
],
|
|
50
|
+
clusters: [],
|
|
51
|
+
built_at: "2026-04-04T12:00:00Z",
|
|
52
|
+
file_count: 1,
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
let testDir: string;
|
|
57
|
+
|
|
58
|
+
describe("JsonFileStore", () => {
|
|
59
|
+
beforeEach(() => {
|
|
60
|
+
testDir = join(tmpdir(), `sourcepress-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
afterEach(() => {
|
|
64
|
+
if (existsSync(testDir)) {
|
|
65
|
+
rmSync(testDir, { recursive: true });
|
|
66
|
+
}
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
it("creates the directory if it does not exist", () => {
|
|
70
|
+
new JsonFileStore(testDir);
|
|
71
|
+
expect(existsSync(testDir)).toBe(true);
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
describe("store and retrieve", () => {
|
|
75
|
+
it("stores and retrieves a knowledge file", async () => {
|
|
76
|
+
const store = new JsonFileStore(testDir);
|
|
77
|
+
const file = makeKnowledgeFile();
|
|
78
|
+
await store.store(file);
|
|
79
|
+
const retrieved = await store.retrieve(file.path);
|
|
80
|
+
expect(retrieved).toEqual(file);
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it("returns null for non-existent file", async () => {
|
|
84
|
+
const store = new JsonFileStore(testDir);
|
|
85
|
+
const retrieved = await store.retrieve("nonexistent.md");
|
|
86
|
+
expect(retrieved).toBeNull();
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
it("overwrites existing file at same path", async () => {
|
|
90
|
+
const store = new JsonFileStore(testDir);
|
|
91
|
+
await store.store(makeKnowledgeFile());
|
|
92
|
+
await store.store(makeKnowledgeFile({ quality_score: 9 }));
|
|
93
|
+
const retrieved = await store.retrieve("knowledge/clients/acme.md");
|
|
94
|
+
expect(retrieved?.quality_score).toBe(9);
|
|
95
|
+
});
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
describe("persistence across instances", () => {
|
|
99
|
+
it("persists knowledge files to disk", async () => {
|
|
100
|
+
const store1 = new JsonFileStore(testDir);
|
|
101
|
+
await store1.store(makeKnowledgeFile({ path: "a.md" }));
|
|
102
|
+
await store1.store(makeKnowledgeFile({ path: "b.md" }));
|
|
103
|
+
|
|
104
|
+
const store2 = new JsonFileStore(testDir);
|
|
105
|
+
expect(await store2.count()).toBe(2);
|
|
106
|
+
expect(await store2.retrieve("a.md")).not.toBeNull();
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
it("persists graph to disk", async () => {
|
|
110
|
+
const store1 = new JsonFileStore(testDir);
|
|
111
|
+
const graph = makeGraph();
|
|
112
|
+
await store1.saveGraph(graph);
|
|
113
|
+
|
|
114
|
+
const store2 = new JsonFileStore(testDir);
|
|
115
|
+
const loaded = await store2.loadGraph();
|
|
116
|
+
expect(loaded).not.toBeNull();
|
|
117
|
+
expect(loaded?.entities.get("acme-corp")?.name).toBe("Acme Corp");
|
|
118
|
+
expect(loaded?.relations).toHaveLength(1);
|
|
119
|
+
expect(loaded?.built_at).toBe("2026-04-04T12:00:00Z");
|
|
120
|
+
});
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
describe("list", () => {
|
|
124
|
+
it("lists all stored files", async () => {
|
|
125
|
+
const store = new JsonFileStore(testDir);
|
|
126
|
+
await store.store(makeKnowledgeFile({ path: "a.md" }));
|
|
127
|
+
await store.store(makeKnowledgeFile({ path: "b.md" }));
|
|
128
|
+
const files = await store.list();
|
|
129
|
+
expect(files).toHaveLength(2);
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
it("filters by type", async () => {
|
|
133
|
+
const store = new JsonFileStore(testDir);
|
|
134
|
+
await store.store(makeKnowledgeFile({ path: "a.md", type: "project-notes" }));
|
|
135
|
+
await store.store(makeKnowledgeFile({ path: "b.md", type: "transcript" }));
|
|
136
|
+
const files = await store.list({ type: "transcript" });
|
|
137
|
+
expect(files).toHaveLength(1);
|
|
138
|
+
expect(files[0].type).toBe("transcript");
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
it("filters by quality", async () => {
|
|
142
|
+
const store = new JsonFileStore(testDir);
|
|
143
|
+
await store.store(makeKnowledgeFile({ path: "a.md", quality: "structured" }));
|
|
144
|
+
await store.store(makeKnowledgeFile({ path: "b.md", quality: "draft" }));
|
|
145
|
+
const files = await store.list({ quality: "draft" });
|
|
146
|
+
expect(files).toHaveLength(1);
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
it("filters by source", async () => {
|
|
150
|
+
const store = new JsonFileStore(testDir);
|
|
151
|
+
await store.store(makeKnowledgeFile({ path: "a.md", source: "manual" }));
|
|
152
|
+
await store.store(makeKnowledgeFile({ path: "b.md", source: "url" }));
|
|
153
|
+
const files = await store.list({ source: "url" });
|
|
154
|
+
expect(files).toHaveLength(1);
|
|
155
|
+
});
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
describe("delete", () => {
|
|
159
|
+
it("deletes an existing file and persists", async () => {
|
|
160
|
+
const store = new JsonFileStore(testDir);
|
|
161
|
+
await store.store(makeKnowledgeFile());
|
|
162
|
+
const deleted = await store.delete("knowledge/clients/acme.md");
|
|
163
|
+
expect(deleted).toBe(true);
|
|
164
|
+
|
|
165
|
+
const store2 = new JsonFileStore(testDir);
|
|
166
|
+
expect(await store2.retrieve("knowledge/clients/acme.md")).toBeNull();
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
it("returns false for non-existent file", async () => {
|
|
170
|
+
const store = new JsonFileStore(testDir);
|
|
171
|
+
expect(await store.delete("nonexistent.md")).toBe(false);
|
|
172
|
+
});
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
describe("count", () => {
|
|
176
|
+
it("returns 0 for empty store", async () => {
|
|
177
|
+
const store = new JsonFileStore(testDir);
|
|
178
|
+
expect(await store.count()).toBe(0);
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
it("returns correct count", async () => {
|
|
182
|
+
const store = new JsonFileStore(testDir);
|
|
183
|
+
await store.store(makeKnowledgeFile({ path: "a.md" }));
|
|
184
|
+
await store.store(makeKnowledgeFile({ path: "b.md" }));
|
|
185
|
+
expect(await store.count()).toBe(2);
|
|
186
|
+
});
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
describe("graph", () => {
|
|
190
|
+
it("returns null when no graph saved", async () => {
|
|
191
|
+
const store = new JsonFileStore(testDir);
|
|
192
|
+
expect(await store.loadGraph()).toBeNull();
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
it("preserves Map structure through serialization", async () => {
|
|
196
|
+
const store = new JsonFileStore(testDir);
|
|
197
|
+
const graph = makeGraph();
|
|
198
|
+
await store.saveGraph(graph);
|
|
199
|
+
|
|
200
|
+
const store2 = new JsonFileStore(testDir);
|
|
201
|
+
const loaded = await store2.loadGraph();
|
|
202
|
+
expect(loaded?.entities).toBeInstanceOf(Map);
|
|
203
|
+
expect(loaded?.entities.size).toBe(1);
|
|
204
|
+
});
|
|
205
|
+
});
|
|
206
|
+
});
|