@sourcepress/knowledge 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/.turbo/turbo-build.log +4 -0
  2. package/.turbo/turbo-test.log +21 -0
  3. package/dist/__tests__/graph-builder.test.d.ts +2 -0
  4. package/dist/__tests__/graph-builder.test.d.ts.map +1 -0
  5. package/dist/__tests__/graph-builder.test.js +122 -0
  6. package/dist/__tests__/graph-builder.test.js.map +1 -0
  7. package/dist/__tests__/graph-ops.test.d.ts +2 -0
  8. package/dist/__tests__/graph-ops.test.d.ts.map +1 -0
  9. package/dist/__tests__/graph-ops.test.js +181 -0
  10. package/dist/__tests__/graph-ops.test.js.map +1 -0
  11. package/dist/__tests__/ingestion.test.d.ts +2 -0
  12. package/dist/__tests__/ingestion.test.d.ts.map +1 -0
  13. package/dist/__tests__/ingestion.test.js +108 -0
  14. package/dist/__tests__/ingestion.test.js.map +1 -0
  15. package/dist/__tests__/json-file-store.test.d.ts +2 -0
  16. package/dist/__tests__/json-file-store.test.d.ts.map +1 -0
  17. package/dist/__tests__/json-file-store.test.js +180 -0
  18. package/dist/__tests__/json-file-store.test.js.map +1 -0
  19. package/dist/__tests__/knowledge-engine.test.d.ts +2 -0
  20. package/dist/__tests__/knowledge-engine.test.d.ts.map +1 -0
  21. package/dist/__tests__/knowledge-engine.test.js +152 -0
  22. package/dist/__tests__/knowledge-engine.test.js.map +1 -0
  23. package/dist/__tests__/knowledge-store.test.d.ts +2 -0
  24. package/dist/__tests__/knowledge-store.test.d.ts.map +1 -0
  25. package/dist/__tests__/knowledge-store.test.js +97 -0
  26. package/dist/__tests__/knowledge-store.test.js.map +1 -0
  27. package/dist/__tests__/scraper.test.d.ts +2 -0
  28. package/dist/__tests__/scraper.test.d.ts.map +1 -0
  29. package/dist/__tests__/scraper.test.js +66 -0
  30. package/dist/__tests__/scraper.test.js.map +1 -0
  31. package/dist/__tests__/sitemap-parser.test.d.ts +2 -0
  32. package/dist/__tests__/sitemap-parser.test.d.ts.map +1 -0
  33. package/dist/__tests__/sitemap-parser.test.js +75 -0
  34. package/dist/__tests__/sitemap-parser.test.js.map +1 -0
  35. package/dist/graph-builder.d.ts +17 -0
  36. package/dist/graph-builder.d.ts.map +1 -0
  37. package/dist/graph-builder.js +98 -0
  38. package/dist/graph-builder.js.map +1 -0
  39. package/dist/graph-ops.d.ts +21 -0
  40. package/dist/graph-ops.d.ts.map +1 -0
  41. package/dist/graph-ops.js +108 -0
  42. package/dist/graph-ops.js.map +1 -0
  43. package/dist/index.d.ts +10 -0
  44. package/dist/index.d.ts.map +1 -0
  45. package/dist/index.js +8 -0
  46. package/dist/index.js.map +1 -0
  47. package/dist/ingestion/index.d.ts +4 -0
  48. package/dist/ingestion/index.d.ts.map +1 -0
  49. package/dist/ingestion/index.js +3 -0
  50. package/dist/ingestion/index.js.map +1 -0
  51. package/dist/ingestion/scraper.d.ts +22 -0
  52. package/dist/ingestion/scraper.d.ts.map +1 -0
  53. package/dist/ingestion/scraper.js +118 -0
  54. package/dist/ingestion/scraper.js.map +1 -0
  55. package/dist/ingestion/sitemap-parser.d.ts +32 -0
  56. package/dist/ingestion/sitemap-parser.d.ts.map +1 -0
  57. package/dist/ingestion/sitemap-parser.js +104 -0
  58. package/dist/ingestion/sitemap-parser.js.map +1 -0
  59. package/dist/ingestion/types.d.ts +58 -0
  60. package/dist/ingestion/types.d.ts.map +1 -0
  61. package/dist/ingestion/types.js +2 -0
  62. package/dist/ingestion/types.js.map +1 -0
  63. package/dist/json-file-store.d.ts +19 -0
  64. package/dist/json-file-store.d.ts.map +1 -0
  65. package/dist/json-file-store.js +100 -0
  66. package/dist/json-file-store.js.map +1 -0
  67. package/dist/knowledge-engine.d.ts +45 -0
  68. package/dist/knowledge-engine.d.ts.map +1 -0
  69. package/dist/knowledge-engine.js +160 -0
  70. package/dist/knowledge-engine.js.map +1 -0
  71. package/dist/knowledge-store.d.ts +14 -0
  72. package/dist/knowledge-store.d.ts.map +1 -0
  73. package/dist/knowledge-store.js +40 -0
  74. package/dist/knowledge-store.js.map +1 -0
  75. package/dist/types.d.ts +67 -0
  76. package/dist/types.d.ts.map +1 -0
  77. package/dist/types.js +2 -0
  78. package/dist/types.js.map +1 -0
  79. package/package.json +26 -0
  80. package/src/__tests__/graph-builder.test.ts +129 -0
  81. package/src/__tests__/graph-ops.test.ts +189 -0
  82. package/src/__tests__/ingestion.test.ts +127 -0
  83. package/src/__tests__/json-file-store.test.ts +206 -0
  84. package/src/__tests__/knowledge-engine.test.ts +177 -0
  85. package/src/__tests__/knowledge-store.test.ts +111 -0
  86. package/src/__tests__/scraper.test.ts +74 -0
  87. package/src/__tests__/sitemap-parser.test.ts +85 -0
  88. package/src/graph-builder.ts +109 -0
  89. package/src/graph-ops.ts +129 -0
  90. package/src/index.ts +27 -0
  91. package/src/ingestion/index.ts +10 -0
  92. package/src/ingestion/scraper.ts +137 -0
  93. package/src/ingestion/sitemap-parser.ts +119 -0
  94. package/src/ingestion/types.ts +57 -0
  95. package/src/json-file-store.ts +127 -0
  96. package/src/knowledge-engine.ts +217 -0
  97. package/src/knowledge-store.ts +49 -0
  98. package/src/types.ts +76 -0
  99. package/tsconfig.json +5 -0
  100. package/vitest.config.ts +2 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sitemap-parser.test.js","sourceRoot":"","sources":["../../src/__tests__/sitemap-parser.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAClD,OAAO,EAAE,aAAa,EAAE,MAAM,gCAAgC,CAAC;AAE/D,MAAM,cAAc,GAAG;;;;;;;;;;UAUb,CAAC;AAEX,SAAS,WAAW,CAAC,GAAW,EAAE,MAAM,GAAG,GAAG;IAC7C,OAAO,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC;QAChC,EAAE,EAAE,MAAM,IAAI,GAAG,IAAI,MAAM,GAAG,GAAG;QACjC,MAAM;QACN,UAAU,EAAE,IAAI;QAChB,IAAI,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC;KAChC,CAAuC,CAAC;AAC1C,CAAC;AAED,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC9B,EAAE,CAAC,6CAA6C,EAAE,KAAK,IAAI,EAAE;QAC5D,MAAM,MAAM,GAAG,IAAI,aAAa,CAAC,WAAW,CAAC,cAAc,CAAC,CAAC,CAAC;QAC9D,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,iCAAiC,CAAC,CAAC;QAErE,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QAEzD,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,aAAa,CAAC,CAAC;QAC1E,MAAM,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;QAC/B,MAAM,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAEhC,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC;QAClE,MAAM,CAAC,IAAI,CAAC,CAAC,WAAW,EAAE,CAAC;QAC3B,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC7B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sBAAsB,EAAE,KAAK,IAAI,EAAE;QACrC,MAAM,MAAM,GAAG,IAAI,aAAa,CAAC,WAAW,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC,CAAC;QACvD,MAAM,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,iCAAiC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IACtF,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2BAA2B,EAAE,KAAK,IAAI,EAAE;QAC1C,MAAM,MAAM,GAAG,IAAI,aAAa,CAAC,WAAW,CAAC,mBAAmB,CAAC,CAAC,CAAC;QACnE,MAAM,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,iCAAiC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,eAAe,CAAC,CAAC;IAChG,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,KAAK,IAAI,EAAE;QACjD,MAAM,MAAM,GAAG,IAAI,aAAa,CAAC,WAAW,CAAC,cAAc,CAAC,CAAC,CAAC;QAC9D,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,iCAAiC,CAAC,CAAC;QACrE,MAAM,QAAQ,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,EAAE;YAC1C,WAAW,EAAE,iCAAiC;YAC9C,OAAO,EAAE,CAAC,aAAa,CAAC;SACxB,CAAC,CAAC;QACH,MAAM,CAAC,QAAQ,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACjC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACpE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,KAAK,IAAI,EAAE;QACjD,MAAM,MAAM,GAAG,IAAI,aAAa,CAAC,WAAW,CAAC,cAAc,CAAC,CAAC,CAAC;QAC9D,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,iCAAiC,CAAC,CAAC;QACrE,MAAM,QAAQ,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,EAAE;YAC1C,WAAW,EAAE,iCAAiC;YAC9C,OAAO,EAAE,CAAC,SAAS,CAAC;SACpB,CAAC,CAAC;QACH,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC/D,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,mBAAmB;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8BAA8B,EAAE,KAAK,IAAI,EAAE;QAC7C,MAAM,MAAM,GAAG,IAAI,aAAa,CAAC,WAAW,CAAC,cAAc,CAAC,CAAC,CAAC;QAC9D,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,iCAAiC,CAAC,CAAC;QACrE,MAAM,QAAQ,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,EAAE;YAC1C,WAAW,EAAE,iCAAiC;YAC9C,OAAO,EAAE,CAAC,aAAa,EAAE,SAAS,CAAC;YACnC,OAAO,EAAE,CAAC,SAAS,CAAC;SACpB,CAAC,CAAC;QACH,MAAM,CAAC,QAAQ,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACjC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACpE,CAAC,CAAC,CAAC;AACJ,CAAC,CAAC,CAAC"}
@@ -0,0 +1,17 @@
1
+ import type { ExtractedEntity, ExtractedRelation, KnowledgeGraph } from "./types.js";
2
+ export interface GraphBuilderOptions {
3
+ minConfidence?: number;
4
+ }
5
+ export declare class GraphBuilder {
6
+ private entities;
7
+ private relations;
8
+ private sourceFiles;
9
+ private minConfidence;
10
+ constructor(options?: GraphBuilderOptions);
11
+ addEntities(entities: ExtractedEntity[]): void;
12
+ getEntities(): Map<string, ExtractedEntity>;
13
+ addRelations(relations: ExtractedRelation[]): void;
14
+ build(): KnowledgeGraph;
15
+ private buildClusters;
16
+ }
17
+ //# sourceMappingURL=graph-builder.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"graph-builder.d.ts","sourceRoot":"","sources":["../src/graph-builder.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAiB,eAAe,EAAE,iBAAiB,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAEpG,MAAM,WAAW,mBAAmB;IACnC,aAAa,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,qBAAa,YAAY;IACxB,OAAO,CAAC,QAAQ,CAA2C;IAC3D,OAAO,CAAC,SAAS,CAA2B;IAC5C,OAAO,CAAC,WAAW,CAA0B;IAC7C,OAAO,CAAC,aAAa,CAAS;gBAElB,OAAO,GAAE,mBAAwB;IAI7C,WAAW,CAAC,QAAQ,EAAE,eAAe,EAAE,GAAG,IAAI;IAmB9C,WAAW,IAAI,GAAG,CAAC,MAAM,EAAE,eAAe,CAAC;IAI3C,YAAY,CAAC,SAAS,EAAE,iBAAiB,EAAE,GAAG,IAAI;IAOlD,KAAK,IAAI,cAAc;IAYvB,OAAO,CAAC,aAAa;CAkDrB"}
@@ -0,0 +1,98 @@
1
+ export class GraphBuilder {
2
+ entities = new Map();
3
+ relations = [];
4
+ sourceFiles = new Set();
5
+ minConfidence;
6
+ constructor(options = {}) {
7
+ this.minConfidence = options.minConfidence ?? 0;
8
+ }
9
+ addEntities(entities) {
10
+ for (const entity of entities) {
11
+ this.sourceFiles.add(entity.source_file);
12
+ const existing = this.entities.get(entity.name);
13
+ if (existing) {
14
+ const mergedAliases = Array.from(new Set([...(existing.aliases || []), ...(entity.aliases || [])]));
15
+ if (entity.confidence > existing.confidence) {
16
+ this.entities.set(entity.name, { ...entity, aliases: mergedAliases });
17
+ }
18
+ else {
19
+ existing.aliases = mergedAliases;
20
+ }
21
+ }
22
+ else {
23
+ this.entities.set(entity.name, { ...entity });
24
+ }
25
+ }
26
+ }
27
+ getEntities() {
28
+ return this.entities;
29
+ }
30
+ addRelations(relations) {
31
+ for (const relation of relations) {
32
+ this.sourceFiles.add(relation.source_file);
33
+ this.relations.push({ ...relation });
34
+ }
35
+ }
36
+ build() {
37
+ const filteredRelations = this.relations.filter((r) => r.confidence >= this.minConfidence);
38
+ const clusters = this.buildClusters(filteredRelations);
39
+ return {
40
+ entities: new Map(this.entities),
41
+ relations: filteredRelations,
42
+ clusters,
43
+ built_at: new Date().toISOString(),
44
+ file_count: this.sourceFiles.size,
45
+ };
46
+ }
47
+ buildClusters(relations) {
48
+ const parent = new Map();
49
+ const find = (x) => {
50
+ if (!parent.has(x))
51
+ parent.set(x, x);
52
+ // biome-ignore lint/style/noNonNullAssertion: key presence guaranteed by parent.has() check above
53
+ if (parent.get(x) !== x)
54
+ parent.set(x, find(parent.get(x)));
55
+ // biome-ignore lint/style/noNonNullAssertion: key presence guaranteed by parent.has() check above
56
+ return parent.get(x);
57
+ };
58
+ const union = (a, b) => {
59
+ const rootA = find(a);
60
+ const rootB = find(b);
61
+ if (rootA !== rootB)
62
+ parent.set(rootB, rootA);
63
+ };
64
+ for (const name of this.entities.keys())
65
+ find(name);
66
+ for (const relation of relations) {
67
+ if (this.entities.has(relation.from_entity) && this.entities.has(relation.to_entity)) {
68
+ union(relation.from_entity, relation.to_entity);
69
+ }
70
+ }
71
+ const groups = new Map();
72
+ for (const name of this.entities.keys()) {
73
+ const root = find(name);
74
+ if (!groups.has(root))
75
+ groups.set(root, []);
76
+ // biome-ignore lint/style/noNonNullAssertion: key presence guaranteed by groups.has() check above
77
+ groups.get(root).push(name);
78
+ }
79
+ const clusters = [];
80
+ let clusterId = 0;
81
+ for (const [, members] of groups) {
82
+ if (members.length < 2)
83
+ continue;
84
+ const clusterRelations = relations.filter((r) => members.includes(r.from_entity) && members.includes(r.to_entity));
85
+ const coherence = clusterRelations.length > 0
86
+ ? clusterRelations.reduce((sum, r) => sum + r.confidence, 0) / clusterRelations.length
87
+ : 0;
88
+ clusters.push({
89
+ id: `cluster-${clusterId++}`,
90
+ name: members[0],
91
+ entities: members,
92
+ coherence_score: Math.round(coherence * 100) / 100,
93
+ });
94
+ }
95
+ return clusters;
96
+ }
97
+ }
98
+ //# sourceMappingURL=graph-builder.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"graph-builder.js","sourceRoot":"","sources":["../src/graph-builder.ts"],"names":[],"mappings":"AAMA,MAAM,OAAO,YAAY;IAChB,QAAQ,GAAiC,IAAI,GAAG,EAAE,CAAC;IACnD,SAAS,GAAwB,EAAE,CAAC;IACpC,WAAW,GAAgB,IAAI,GAAG,EAAE,CAAC;IACrC,aAAa,CAAS;IAE9B,YAAY,UAA+B,EAAE;QAC5C,IAAI,CAAC,aAAa,GAAG,OAAO,CAAC,aAAa,IAAI,CAAC,CAAC;IACjD,CAAC;IAED,WAAW,CAAC,QAA2B;QACtC,KAAK,MAAM,MAAM,IAAI,QAAQ,EAAE,CAAC;YAC/B,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;YACzC,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YAChD,IAAI,QAAQ,EAAE,CAAC;gBACd,MAAM,aAAa,GAAG,KAAK,CAAC,IAAI,CAC/B,IAAI,GAAG,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAC,OAAO,IAAI,EAAE,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,CAAC,CACjE,CAAC;gBACF,IAAI,MAAM,CAAC,UAAU,GAAG,QAAQ,CAAC,UAAU,EAAE,CAAC;oBAC7C,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,EAAE,GAAG,MAAM,EAAE,OAAO,EAAE,aAAa,EAAE,CAAC,CAAC;gBACvE,CAAC;qBAAM,CAAC;oBACP,QAAQ,CAAC,OAAO,GAAG,aAAa,CAAC;gBAClC,CAAC;YACF,CAAC;iBAAM,CAAC;gBACP,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,EAAE,GAAG,MAAM,EAAE,CAAC,CAAC;YAC/C,CAAC;QACF,CAAC;IACF,CAAC;IAED,WAAW;QACV,OAAO,IAAI,CAAC,QAAQ,CAAC;IACtB,CAAC;IAED,YAAY,CAAC,SAA8B;QAC1C,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YAClC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;YAC3C,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,GAAG,QAAQ,EAAE,CAAC,CAAC;QACtC,CAAC;IACF,CAAC;IAED,KAAK;QACJ,MAAM,iBAAiB,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,IAAI,IAAI,CAAC,aAAa,CAAC,CAAC;QAC3F,MAAM,QAAQ,GAAG,IAAI,CAAC,aAAa,CAAC,iBAAiB,CAAC,CAAC;QACvD,OAAO;YACN,QAAQ,EAAE,IAAI,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC;YAChC,SAAS,EAAE,iBAAiB;YAC5B,QAAQ;YACR,QAAQ,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YAClC,UAAU,EAAE,IAAI,CAAC,WAAW,CAAC,IAAI;SACjC,CAAC;IACH,CAAC;IAEO,aAAa,CAAC,SAA8B;QACnD,MAAM,MAAM,GAAwB,IAAI,GAAG,EAAE,CAAC;QAC9C,MAAM,IAAI,GAAG,CAAC,CAAS,EAAU,EAAE;YAClC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;gBAAE,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YACrC,kGAAkG;YAClG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC;gBAAE,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC;YAC7D,kGAAkG;YAClG,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAE,CAAC;QACvB,CAAC,CAAC;QACF,MAAM,KAAK,GAAG,CAAC,CAAS,EAAE,CAAS,EAAQ,EAAE;YAC5C,MAAM,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YACtB,MAAM,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YACtB,IAAI,KAAK,KAAK,KAAK;gBAAE,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QAC/C,CAAC,CAAC;QAEF,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;YAAE,IAAI,CAAC,IAAI,CAAC,CAAC;QACpD,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YAClC,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;gBACtF,KAAK,CAAC,QAAQ,CAAC,WAAW,EAAE,QAAQ,CAAC,SAAS,CAAC,CAAC;YACjD,CAAC;QACF,CAAC;QAED,MAAM,MAAM,GAA0B,IAAI,GAAG,EAAE,CAAC;QAChD,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC;YACzC,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;YACxB,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC;gBAAE,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;YAC5C,kGAAkG;YAClG,MAAM,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC9B,CAAC;QAED,MAAM,QAAQ,GAAoB,EAAE,CAAC;QACrC,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,KAAK,MAAM,CAAC,EAAE,OAAO,CAAC,IAAI,MAAM,EAAE,CAAC;YAClC,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;gBAAE,SAAS;YACjC,MAAM,gBAAgB,GAAG,SAAS,CAAC,MAAM,CACxC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,WAAW,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,CACvE,CAAC;YACF,MAAM,SAAS,GACd,gBAAgB,CAAC,MAAM,GAAG,CAAC;gBAC1B,CAAC,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,GAAG,gBAAgB,CAAC,MAAM;gBACtF,CAAC,CAAC,CAAC,CAAC;YACN,QAAQ,CAAC,IAAI,CAAC;gBACb,EAAE,EAAE,WAAW,SAAS,EAAE,EAAE;gBAC5B,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC;gBAChB,QAAQ,EAAE,OAAO;gBACjB,eAAe,EAAE,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC,GAAG,GAAG;aAClD,CAAC,CAAC;QACJ,CAAC;QACD,OAAO,QAAQ,CAAC;IACjB,CAAC;CACD"}
@@ -0,0 +1,21 @@
1
+ import type { ContentFile } from "@sourcepress/core";
2
+ import type { GraphQueryResult, KnowledgeGap, KnowledgeGraph, StaleContent } from "./types.js";
3
+ export interface GraphStats {
4
+ entity_count: number;
5
+ relation_count: number;
6
+ cluster_count: number;
7
+ file_count: number;
8
+ built_at: string;
9
+ }
10
+ export declare class GraphOps {
11
+ private graph;
12
+ private aliasIndex;
13
+ constructor(graph: KnowledgeGraph);
14
+ private buildAliasIndex;
15
+ private resolveEntityName;
16
+ query(nameOrAlias: string): GraphQueryResult | null;
17
+ findGaps(contentFiles: ContentFile[]): KnowledgeGap[];
18
+ findStale(contentFiles: ContentFile[], knowledgeTimestamps: Record<string, string>): StaleContent[];
19
+ getStats(): GraphStats;
20
+ }
21
+ //# sourceMappingURL=graph-ops.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"graph-ops.d.ts","sourceRoot":"","sources":["../src/graph-ops.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,KAAK,EAEX,gBAAgB,EAChB,YAAY,EACZ,cAAc,EACd,YAAY,EACZ,MAAM,YAAY,CAAC;AAEpB,MAAM,WAAW,UAAU;IAC1B,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,CAAC;IACvB,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;CACjB;AAED,qBAAa,QAAQ;IACpB,OAAO,CAAC,KAAK,CAAiB;IAC9B,OAAO,CAAC,UAAU,CAAsB;gBAE5B,KAAK,EAAE,cAAc;IAKjC,OAAO,CAAC,eAAe;IAWvB,OAAO,CAAC,iBAAiB;IAKzB,KAAK,CAAC,WAAW,EAAE,MAAM,GAAG,gBAAgB,GAAG,IAAI;IA0BnD,QAAQ,CAAC,YAAY,EAAE,WAAW,EAAE,GAAG,YAAY,EAAE;IAqBrD,SAAS,CACR,YAAY,EAAE,WAAW,EAAE,EAC3B,mBAAmB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GACzC,YAAY,EAAE;IA2BjB,QAAQ,IAAI,UAAU;CAStB"}
@@ -0,0 +1,108 @@
1
+ export class GraphOps {
2
+ graph;
3
+ aliasIndex;
4
+ constructor(graph) {
5
+ this.graph = graph;
6
+ this.aliasIndex = this.buildAliasIndex();
7
+ }
8
+ buildAliasIndex() {
9
+ const index = new Map();
10
+ for (const [name, entity] of this.graph.entities) {
11
+ index.set(name.toLowerCase(), name);
12
+ for (const alias of entity.aliases || []) {
13
+ index.set(alias.toLowerCase(), name);
14
+ }
15
+ }
16
+ return index;
17
+ }
18
+ resolveEntityName(nameOrAlias) {
19
+ if (this.graph.entities.has(nameOrAlias))
20
+ return nameOrAlias;
21
+ return this.aliasIndex.get(nameOrAlias.toLowerCase()) ?? null;
22
+ }
23
+ query(nameOrAlias) {
24
+ const resolvedName = this.resolveEntityName(nameOrAlias);
25
+ if (!resolvedName)
26
+ return null;
27
+ const entity = this.graph.entities.get(resolvedName);
28
+ if (!entity)
29
+ return null;
30
+ const relations = this.graph.relations.filter((r) => r.from_entity === resolvedName || r.to_entity === resolvedName);
31
+ const relatedNames = new Set();
32
+ for (const r of relations) {
33
+ if (r.from_entity === resolvedName)
34
+ relatedNames.add(r.to_entity);
35
+ if (r.to_entity === resolvedName)
36
+ relatedNames.add(r.from_entity);
37
+ }
38
+ const related_entities = [];
39
+ for (const name of relatedNames) {
40
+ const e = this.graph.entities.get(name);
41
+ if (e)
42
+ related_entities.push(e);
43
+ }
44
+ const files = new Set();
45
+ files.add(entity.source_file);
46
+ for (const r of relations)
47
+ files.add(r.source_file);
48
+ return { entity, relations, related_entities, files: Array.from(files) };
49
+ }
50
+ findGaps(contentFiles) {
51
+ const gaps = [];
52
+ for (const [name, entity] of this.graph.entities) {
53
+ const contentCount = contentFiles.filter((c) => {
54
+ const bodyLower = c.body.toLowerCase();
55
+ const titleLower = String(c.frontmatter.title ?? "").toLowerCase();
56
+ return bodyLower.includes(name.toLowerCase()) || titleLower.includes(name.toLowerCase());
57
+ }).length;
58
+ if (contentCount === 0) {
59
+ gaps.push({
60
+ entity_name: name,
61
+ entity_type: entity.type,
62
+ knowledge_file_count: 1,
63
+ content_file_count: 0,
64
+ reason: `Entity "${name}" (${entity.type}) exists in knowledge but has no corresponding content`,
65
+ });
66
+ }
67
+ }
68
+ return gaps;
69
+ }
70
+ findStale(contentFiles, knowledgeTimestamps) {
71
+ const stale = [];
72
+ for (const content of contentFiles) {
73
+ if (!content.provenance?.generated_at || !content.provenance?.source_files)
74
+ continue;
75
+ const generatedAt = content.provenance.generated_at;
76
+ const staleSources = [];
77
+ let newestSourceChange = "";
78
+ for (const sourcePath of content.provenance.source_files) {
79
+ const sourceTimestamp = knowledgeTimestamps[sourcePath];
80
+ if (sourceTimestamp && sourceTimestamp > generatedAt) {
81
+ staleSources.push(sourcePath);
82
+ if (sourceTimestamp > newestSourceChange)
83
+ newestSourceChange = sourceTimestamp;
84
+ }
85
+ }
86
+ if (staleSources.length > 0) {
87
+ stale.push({
88
+ content_path: content.path,
89
+ generated_at: generatedAt,
90
+ newest_source_change: newestSourceChange,
91
+ stale_sources: staleSources,
92
+ reason: `Content generated at ${generatedAt} but source(s) updated at ${newestSourceChange}`,
93
+ });
94
+ }
95
+ }
96
+ return stale;
97
+ }
98
+ getStats() {
99
+ return {
100
+ entity_count: this.graph.entities.size,
101
+ relation_count: this.graph.relations.length,
102
+ cluster_count: this.graph.clusters.length,
103
+ file_count: this.graph.file_count,
104
+ built_at: this.graph.built_at,
105
+ };
106
+ }
107
+ }
108
+ //# sourceMappingURL=graph-ops.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"graph-ops.js","sourceRoot":"","sources":["../src/graph-ops.ts"],"names":[],"mappings":"AAiBA,MAAM,OAAO,QAAQ;IACZ,KAAK,CAAiB;IACtB,UAAU,CAAsB;IAExC,YAAY,KAAqB;QAChC,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,eAAe,EAAE,CAAC;IAC1C,CAAC;IAEO,eAAe;QACtB,MAAM,KAAK,GAAG,IAAI,GAAG,EAAkB,CAAC;QACxC,KAAK,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC;YAClD,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,IAAI,CAAC,CAAC;YACpC,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,OAAO,IAAI,EAAE,EAAE,CAAC;gBAC1C,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,EAAE,EAAE,IAAI,CAAC,CAAC;YACtC,CAAC;QACF,CAAC;QACD,OAAO,KAAK,CAAC;IACd,CAAC;IAEO,iBAAiB,CAAC,WAAmB;QAC5C,IAAI,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,WAAW,CAAC;YAAE,OAAO,WAAW,CAAC;QAC7D,OAAO,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC,IAAI,IAAI,CAAC;IAC/D,CAAC;IAED,KAAK,CAAC,WAAmB;QACxB,MAAM,YAAY,GAAG,IAAI,CAAC,iBAAiB,CAAC,WAAW,CAAC,CAAC;QACzD,IAAI,CAAC,YAAY;YAAE,OAAO,IAAI,CAAC;QAC/B,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;QACrD,IAAI,CAAC,MAAM;YAAE,OAAO,IAAI,CAAC;QAEzB,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,MAAM,CAC5C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,KAAK,YAAY,IAAI,CAAC,CAAC,SAAS,KAAK,YAAY,CACrE,CAAC;QACF,MAAM,YAAY,GAAG,IAAI,GAAG,EAAU,CAAC;QACvC,KAAK,MAAM,CAAC,IAAI,SAAS,EAAE,CAAC;YAC3B,IAAI,CAAC,CAAC,WAAW,KAAK,YAAY;gBAAE,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;YAClE,IAAI,CAAC,CAAC,SAAS,KAAK,YAAY;gBAAE,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;QACnE,CAAC;QACD,MAAM,gBAAgB,GAAsB,EAAE,CAAC;QAC/C,KAAK,MAAM,IAAI,IAAI,YAAY,EAAE,CAAC;YACjC,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YACxC,IAAI,CAAC;gBAAE,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACjC,CAAC;QACD,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;QAChC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;QAC9B,KAAK,MAAM,CAAC,IAAI,SAAS;YAAE,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;QAEpD,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,gBAAgB,EAAE,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;IAC1E,CAAC;IAED,QAAQ,CAAC,YAA2B;QACnC,MAAM,IAAI,GAAmB,EAAE,CAAC;QAChC,KAAK,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC;YAClD,MAAM,YAAY,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;gBAC9C,MAAM,SAAS,GAAG,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;gBACvC,MAAM,UAAU,GAAG,MAAM,CAAC,CAAC,CAAC,WAAW,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;gBACnE,OAAO,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,UAAU,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;YAC1F,CAAC,CAAC,CAAC,MAAM,CAAC;YACV,IAAI,YAAY,KAAK,CAAC,EAAE,CAAC;gBACxB,IAAI,CAAC,IAAI,CAAC;oBACT,WAAW,EAAE,IAAI;oBACjB,WAAW,EAAE,MAAM,CAAC,IAAI;oBACxB,oBAAoB,EAAE,CAAC;oBACvB,kBAAkB,EAAE,CAAC;oBACrB,MAAM,EAAE,WAAW,IAAI,MAAM,MAAM,CAAC,IAAI,wDAAwD;iBAChG,CAAC,CAAC;YACJ,CAAC;QACF,CAAC;QACD,OAAO,IAAI,CAAC;IACb,CAAC;IAED,SAAS,CACR,YAA2B,EAC3B,mBAA2C;QAE3C,MAAM,KAAK,GAAmB,EAAE,CAAC;QACjC,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE,CAAC;YACpC,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,YAAY,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,YAAY;gBAAE,SAAS;YACrF,MAAM,WAAW,GAAG,OAAO,CAAC,UAAU,CAAC,YAAY,CAAC;YACpD,MAAM,YAAY,GAAa,EAAE,CAAC;YAClC,IAAI,kBAAkB,GAAG,EAAE,CAAC;YAC5B,KAAK,MAAM,UAAU,IAAI,OAAO,CAAC,UAAU,CAAC,YAAY,EAAE,CAAC;gBAC1D,MAAM,eAAe,GAAG,mBAAmB,CAAC,UAAU,CAAC,CAAC;gBACxD,IAAI,eAAe,IAAI,eAAe,GAAG,WAAW,EAAE,CAAC;oBACtD,YAAY,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;oBAC9B,IAAI,eAAe,GAAG,kBAAkB;wBAAE,kBAAkB,GAAG,eAAe,CAAC;gBAChF,CAAC;YACF,CAAC;YACD,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC7B,KAAK,CAAC,IAAI,CAAC;oBACV,YAAY,EAAE,OAAO,CAAC,IAAI;oBAC1B,YAAY,EAAE,WAAW;oBACzB,oBAAoB,EAAE,kBAAkB;oBACxC,aAAa,EAAE,YAAY;oBAC3B,MAAM,EAAE,wBAAwB,WAAW,6BAA6B,kBAAkB,EAAE;iBAC5F,CAAC,CAAC;YACJ,CAAC;QACF,CAAC;QACD,OAAO,KAAK,CAAC;IACd,CAAC;IAED,QAAQ;QACP,OAAO;YACN,YAAY,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI;YACtC,cAAc,EAAE,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,MAAM;YAC3C,aAAa,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM;YACzC,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,UAAU;YACjC,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ;SAC7B,CAAC;IACH,CAAC;CACD"}
@@ -0,0 +1,10 @@
1
+ export { InMemoryKnowledgeStore } from "./knowledge-store.js";
2
+ export { JsonFileStore } from "./json-file-store.js";
3
+ export { KnowledgeEngine } from "./knowledge-engine.js";
4
+ export { GraphBuilder, type GraphBuilderOptions } from "./graph-builder.js";
5
+ export { GraphOps, type GraphStats } from "./graph-ops.js";
6
+ export type { ExtractionResult, ExtractedEntity, ExtractedRelation, EntityCluster, KnowledgeGraph, GraphQueryResult, KnowledgeGap, StaleContent, KnowledgeStoreBackend, KnowledgeFileFilter, } from "./types.js";
7
+ export { Scraper } from "./ingestion/scraper.js";
8
+ export { SitemapParser } from "./ingestion/sitemap-parser.js";
9
+ export type { Fetcher, ScrapeResult, SitemapSection, SitemapResult, SitemapRunOptions, BatchProgress, } from "./ingestion/types.js";
10
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,sBAAsB,EAAE,MAAM,sBAAsB,CAAC;AAC9D,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,YAAY,EAAE,KAAK,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAC5E,OAAO,EAAE,QAAQ,EAAE,KAAK,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAC3D,YAAY,EACX,gBAAgB,EAChB,eAAe,EACf,iBAAiB,EACjB,aAAa,EACb,cAAc,EACd,gBAAgB,EAChB,YAAY,EACZ,YAAY,EACZ,qBAAqB,EACrB,mBAAmB,GACnB,MAAM,YAAY,CAAC;AACpB,OAAO,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AACjD,OAAO,EAAE,aAAa,EAAE,MAAM,+BAA+B,CAAC;AAC9D,YAAY,EACX,OAAO,EACP,YAAY,EACZ,cAAc,EACd,aAAa,EACb,iBAAiB,EACjB,aAAa,GACb,MAAM,sBAAsB,CAAC"}
package/dist/index.js ADDED
@@ -0,0 +1,8 @@
1
+ export { InMemoryKnowledgeStore } from "./knowledge-store.js";
2
+ export { JsonFileStore } from "./json-file-store.js";
3
+ export { KnowledgeEngine } from "./knowledge-engine.js";
4
+ export { GraphBuilder } from "./graph-builder.js";
5
+ export { GraphOps } from "./graph-ops.js";
6
+ export { Scraper } from "./ingestion/scraper.js";
7
+ export { SitemapParser } from "./ingestion/sitemap-parser.js";
8
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,sBAAsB,EAAE,MAAM,sBAAsB,CAAC;AAC9D,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,YAAY,EAA4B,MAAM,oBAAoB,CAAC;AAC5E,OAAO,EAAE,QAAQ,EAAmB,MAAM,gBAAgB,CAAC;AAa3D,OAAO,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AACjD,OAAO,EAAE,aAAa,EAAE,MAAM,+BAA+B,CAAC"}
@@ -0,0 +1,4 @@
1
+ export { Scraper } from "./scraper.js";
2
+ export { SitemapParser } from "./sitemap-parser.js";
3
+ export type { Fetcher, ScrapeResult, SitemapSection, SitemapResult, SitemapRunOptions, BatchProgress, } from "./types.js";
4
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/ingestion/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,YAAY,EACX,OAAO,EACP,YAAY,EACZ,cAAc,EACd,aAAa,EACb,iBAAiB,EACjB,aAAa,GACb,MAAM,YAAY,CAAC"}
@@ -0,0 +1,3 @@
1
+ export { Scraper } from "./scraper.js";
2
+ export { SitemapParser } from "./sitemap-parser.js";
3
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/ingestion/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC"}
@@ -0,0 +1,22 @@
1
+ import type { Fetcher, ScrapeResult } from "./types.js";
2
+ /**
3
+ * Validate a URL against SSRF risks.
4
+ * Throws if the URL targets a private/loopback address or non-http(s) scheme.
5
+ */
6
+ export declare function validateUrl(rawUrl: string): URL;
7
+ /**
8
+ * Scrapes a URL into readable content.
9
+ * Uses linkedom for DOM parsing (works in all runtimes) + Mozilla Readability for extraction.
10
+ */
11
+ export declare class Scraper {
12
+ private fetcher;
13
+ constructor(fetcher?: Fetcher);
14
+ scrape(url: string): Promise<ScrapeResult>;
15
+ extractFromHtml(url: string, html: string): ScrapeResult;
16
+ /**
17
+ * Lightweight HTML to markdown conversion.
18
+ * Handles common elements: headings, paragraphs, links, lists, bold, italic, code.
19
+ */
20
+ private htmlToMarkdown;
21
+ }
22
+ //# sourceMappingURL=scraper.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scraper.d.ts","sourceRoot":"","sources":["../../src/ingestion/scraper.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAaxD;;;GAGG;AACH,wBAAgB,WAAW,CAAC,MAAM,EAAE,MAAM,GAAG,GAAG,CA6B/C;AAED;;;GAGG;AACH,qBAAa,OAAO;IACnB,OAAO,CAAC,OAAO,CAAU;gBAEb,OAAO,CAAC,EAAE,OAAO;IAIvB,MAAM,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAYhD,eAAe,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,YAAY;IAyBxD;;;OAGG;IACH,OAAO,CAAC,cAAc;CAkCtB"}
@@ -0,0 +1,118 @@
1
+ import { Readability } from "@mozilla/readability";
2
+ import { parseHTML } from "linkedom";
3
+ const PRIVATE_IP_PATTERNS = [
4
+ /^127\./,
5
+ /^10\./,
6
+ /^192\.168\./,
7
+ /^172\.(1[6-9]|2\d|3[01])\./,
8
+ /^169\.254\./,
9
+ /^::1$/,
10
+ /^fc00:/i,
11
+ /^fe80:/i,
12
+ ];
13
+ /**
14
+ * Validate a URL against SSRF risks.
15
+ * Throws if the URL targets a private/loopback address or non-http(s) scheme.
16
+ */
17
+ export function validateUrl(rawUrl) {
18
+ let parsed;
19
+ try {
20
+ parsed = new URL(rawUrl);
21
+ }
22
+ catch {
23
+ throw new Error(`Invalid URL: ${rawUrl}`);
24
+ }
25
+ if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
26
+ throw new Error(`URL scheme "${parsed.protocol}" is not allowed. Only http and https are permitted.`);
27
+ }
28
+ const hostname = parsed.hostname.toLowerCase();
29
+ for (const pattern of PRIVATE_IP_PATTERNS) {
30
+ if (pattern.test(hostname)) {
31
+ throw new Error(`URL hostname "${hostname}" resolves to a private or loopback address, which is not allowed.`);
32
+ }
33
+ }
34
+ // Block "localhost" explicitly
35
+ if (hostname === "localhost" || hostname === "0.0.0.0") {
36
+ throw new Error(`URL hostname "${hostname}" is not allowed.`);
37
+ }
38
+ return parsed;
39
+ }
40
+ /**
41
+ * Scrapes a URL into readable content.
42
+ * Uses linkedom for DOM parsing (works in all runtimes) + Mozilla Readability for extraction.
43
+ */
44
+ export class Scraper {
45
+ fetcher;
46
+ constructor(fetcher) {
47
+ this.fetcher = fetcher ?? globalThis.fetch.bind(globalThis);
48
+ }
49
+ async scrape(url) {
50
+ validateUrl(url);
51
+ const response = await this.fetcher(url, undefined);
52
+ if (!response.ok) {
53
+ throw new Error(`Failed to fetch ${url}: ${response.status} ${response.statusText}`);
54
+ }
55
+ const html = await response.text();
56
+ return this.extractFromHtml(url, html);
57
+ }
58
+ extractFromHtml(url, html) {
59
+ const { document } = parseHTML(html);
60
+ // biome-ignore lint/suspicious/noExplicitAny: linkedom document is not typed as DOM Document
61
+ const reader = new Readability(document);
62
+ const article = reader.parse();
63
+ const text = article?.textContent?.trim() ?? "";
64
+ if (!article || text.length < 50) {
65
+ throw new Error(`No readable content found at ${url}`);
66
+ }
67
+ const markdown = this.htmlToMarkdown(article.content);
68
+ return {
69
+ url,
70
+ title: article.title || new URL(url).pathname,
71
+ content: article.textContent,
72
+ markdown,
73
+ byline: article.byline || undefined,
74
+ excerpt: article.excerpt || undefined,
75
+ length: article.textContent.length,
76
+ scraped_at: new Date().toISOString(),
77
+ };
78
+ }
79
+ /**
80
+ * Lightweight HTML to markdown conversion.
81
+ * Handles common elements: headings, paragraphs, links, lists, bold, italic, code.
82
+ */
83
+ htmlToMarkdown(html) {
84
+ let md = html;
85
+ // Headings
86
+ md = md.replace(/<h1[^>]*>(.*?)<\/h1>/gi, "# $1\n\n");
87
+ md = md.replace(/<h2[^>]*>(.*?)<\/h2>/gi, "## $1\n\n");
88
+ md = md.replace(/<h3[^>]*>(.*?)<\/h3>/gi, "### $1\n\n");
89
+ md = md.replace(/<h4[^>]*>(.*?)<\/h4>/gi, "#### $1\n\n");
90
+ // Bold and italic
91
+ md = md.replace(/<(strong|b)>(.*?)<\/\1>/gi, "**$2**");
92
+ md = md.replace(/<(em|i)>(.*?)<\/\1>/gi, "*$2*");
93
+ // Code
94
+ md = md.replace(/<code>(.*?)<\/code>/gi, "`$1`");
95
+ md = md.replace(/<pre[^>]*>(.*?)<\/pre>/gis, "```\n$1\n```\n\n");
96
+ // Links
97
+ md = md.replace(/<a[^>]+href="([^"]*)"[^>]*>(.*?)<\/a>/gi, "[$2]($1)");
98
+ // Lists
99
+ md = md.replace(/<li[^>]*>(.*?)<\/li>/gi, "- $1\n");
100
+ md = md.replace(/<\/?[uo]l[^>]*>/gi, "\n");
101
+ // Paragraphs and line breaks
102
+ md = md.replace(/<br\s*\/?>/gi, "\n");
103
+ md = md.replace(/<p[^>]*>(.*?)<\/p>/gis, "$1\n\n");
104
+ // Strip remaining HTML tags
105
+ md = md.replace(/<[^>]+>/g, "");
106
+ // Decode basic HTML entities
107
+ md = md.replace(/&amp;/g, "&");
108
+ md = md.replace(/&lt;/g, "<");
109
+ md = md.replace(/&gt;/g, ">");
110
+ md = md.replace(/&quot;/g, '"');
111
+ md = md.replace(/&#39;/g, "'");
112
+ md = md.replace(/&nbsp;/g, " ");
113
+ // Clean up whitespace
114
+ md = md.replace(/\n{3,}/g, "\n\n").trim();
115
+ return md;
116
+ }
117
+ }
118
+ //# sourceMappingURL=scraper.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scraper.js","sourceRoot":"","sources":["../../src/ingestion/scraper.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AACnD,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AAGrC,MAAM,mBAAmB,GAAG;IAC3B,QAAQ;IACR,OAAO;IACP,aAAa;IACb,4BAA4B;IAC5B,aAAa;IACb,OAAO;IACP,SAAS;IACT,SAAS;CACT,CAAC;AAEF;;;GAGG;AACH,MAAM,UAAU,WAAW,CAAC,MAAc;IACzC,IAAI,MAAW,CAAC;IAChB,IAAI,CAAC;QACJ,MAAM,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;IAC1B,CAAC;IAAC,MAAM,CAAC;QACR,MAAM,IAAI,KAAK,CAAC,gBAAgB,MAAM,EAAE,CAAC,CAAC;IAC3C,CAAC;IAED,IAAI,MAAM,CAAC,QAAQ,KAAK,OAAO,IAAI,MAAM,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;QACjE,MAAM,IAAI,KAAK,CACd,eAAe,MAAM,CAAC,QAAQ,sDAAsD,CACpF,CAAC;IACH,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;IAC/C,KAAK,MAAM,OAAO,IAAI,mBAAmB,EAAE,CAAC;QAC3C,IAAI,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC5B,MAAM,IAAI,KAAK,CACd,iBAAiB,QAAQ,oEAAoE,CAC7F,CAAC;QACH,CAAC;IACF,CAAC;IAED,+BAA+B;IAC/B,IAAI,QAAQ,KAAK,WAAW,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;QACxD,MAAM,IAAI,KAAK,CAAC,iBAAiB,QAAQ,mBAAmB,CAAC,CAAC;IAC/D,CAAC;IAED,OAAO,MAAM,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,MAAM,OAAO,OAAO;IACX,OAAO,CAAU;IAEzB,YAAY,OAAiB;QAC5B,IAAI,CAAC,OAAO,GAAG,OAAO,IAAI,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAC7D,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,GAAW;QACvB,WAAW,CAAC,GAAG,CAAC,CAAC;QAEjB,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;QACpD,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YAClB,MAAM,IAAI,KAAK,CAAC,mBAAmB,GAAG,KAAK,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;QACtF,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,OAAO,IAAI,CAAC,eAAe,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;IACxC,CAAC;IAED,eAAe,CAAC,GAAW,EAAE,IAAY;QACxC,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QACrC,6FAA6F;QAC7F,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,QAAe,CAAC,CAAC;QAChD,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;QAE/B,MAAM,IAAI,GAAG,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QAChD,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;YAClC,MAAM,IAAI,KAAK,CAAC,gCAAgC,GAAG,EAAE,CAAC,CAAC;QACxD,CAAC;QAED,MAAM,QAAQ,GAAG,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAEtD,OAAO;YACN,GAAG;YACH,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ;YAC7C,OAAO,EAAE,OAAO,CAAC,WAAW;YAC5B,QAAQ;YACR,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,SAAS;YACnC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,SAAS;YACrC,MAAM,EAAE,OAAO,CAAC,WAAW,CAAC,MAAM;YAClC,UAAU,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAC;IACH,CAAC;IAED;;;OAGG;IACK,cAAc,CAAC,IAAY;QAClC,IAAI,EAAE,GAAG,IAAI,CAAC;QACd,WAAW;QACX,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,wBAAwB,EAAE,UAAU,CAAC,CAAC;QACtD,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,wBAAwB,EAAE,WAAW,CAAC,CAAC;QACvD,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,wBAAwB,EAAE,YAAY,CAAC,CAAC;QACxD,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,wBAAwB,EAAE,aAAa,CAAC,CAAC;QACzD,kBAAkB;QAClB,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,2BAA2B,EAAE,QAAQ,CAAC,CAAC;QACvD,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,uBAAuB,EAAE,MAAM,CAAC,CAAC;QACjD,OAAO;QACP,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,uBAAuB,EAAE,MAAM,CAAC,CAAC;QACjD,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,2BAA2B,EAAE,kBAAkB,CAAC,CAAC;QACjE,QAAQ;QACR,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,yCAAyC,EAAE,UAAU,CAAC,CAAC;QACvE,QAAQ;QACR,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,wBAAwB,EAAE,QAAQ,CAAC,CAAC;QACpD,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,mBAAmB,EAAE,IAAI,CAAC,CAAC;QAC3C,6BAA6B;QAC7B,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,cAAc,EAAE,IAAI,CAAC,CAAC;QACtC,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,uBAAuB,EAAE,QAAQ,CAAC,CAAC;QACnD,4BAA4B;QAC5B,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;QAChC,6BAA6B;QAC7B,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QAC/B,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;QAC9B,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;QAC9B,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;QAChC,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QAC/B,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;QAChC,sBAAsB;QACtB,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;QAC1C,OAAO,EAAE,CAAC;IACX,CAAC;CACD"}
@@ -0,0 +1,32 @@
1
+ import type { Fetcher, SitemapResult, SitemapRunOptions } from "./types.js";
2
+ /**
3
+ * Parses XML sitemaps and groups URLs by path pattern for interactive selection.
4
+ */
5
+ export declare class SitemapParser {
6
+ private fetcher;
7
+ constructor(fetcher?: Fetcher);
8
+ /**
9
+ * Fetch and parse a sitemap, returning URLs grouped by path section.
10
+ */
11
+ parse(sitemapUrl: string): Promise<SitemapResult>;
12
+ /**
13
+ * Filter URLs based on include/exclude patterns.
14
+ * Patterns use simple glob matching: "/blog/*" matches "/blog/anything".
15
+ */
16
+ filterUrls(result: SitemapResult, options: SitemapRunOptions): string[];
17
+ /**
18
+ * Extract <loc> URLs from sitemap XML.
19
+ * Handles both regular sitemaps and sitemap indexes.
20
+ */
21
+ private extractUrls;
22
+ /**
23
+ * Group URLs by their first path segment to create browsable sections.
24
+ */
25
+ private groupByPattern;
26
+ /**
27
+ * Simple glob pattern matching.
28
+ * Supports trailing wildcard: "/blog/*" matches "/blog/my-post".
29
+ */
30
+ private matchPattern;
31
+ }
32
+ //# sourceMappingURL=sitemap-parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sitemap-parser.d.ts","sourceRoot":"","sources":["../../src/ingestion/sitemap-parser.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,OAAO,EAAE,aAAa,EAAE,iBAAiB,EAAkB,MAAM,YAAY,CAAC;AAE5F;;GAEG;AACH,qBAAa,aAAa;IACzB,OAAO,CAAC,OAAO,CAAU;gBAEb,OAAO,CAAC,EAAE,OAAO;IAI7B;;OAEG;IACG,KAAK,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,CAAC;IAuBvD;;;OAGG;IACH,UAAU,CAAC,MAAM,EAAE,aAAa,EAAE,OAAO,EAAE,iBAAiB,GAAG,MAAM,EAAE;IAoBvE;;;OAGG;IACH,OAAO,CAAC,WAAW;IAUnB;;OAEG;IACH,OAAO,CAAC,cAAc;IA2BtB;;;OAGG;IACH,OAAO,CAAC,YAAY;CAOpB"}
@@ -0,0 +1,104 @@
1
+ import { validateUrl } from "./scraper.js";
2
+ /**
3
+ * Parses XML sitemaps and groups URLs by path pattern for interactive selection.
4
+ */
5
+ export class SitemapParser {
6
+ fetcher;
7
+ constructor(fetcher) {
8
+ this.fetcher = fetcher ?? globalThis.fetch.bind(globalThis);
9
+ }
10
+ /**
11
+ * Fetch and parse a sitemap, returning URLs grouped by path section.
12
+ */
13
+ async parse(sitemapUrl) {
14
+ validateUrl(sitemapUrl);
15
+ const response = await this.fetcher(sitemapUrl);
16
+ if (!response.ok) {
17
+ throw new Error(`Failed to fetch sitemap ${sitemapUrl}: ${response.status}`);
18
+ }
19
+ const xml = await response.text();
20
+ const urls = this.extractUrls(xml);
21
+ if (urls.length === 0) {
22
+ throw new Error(`No URLs found in sitemap ${sitemapUrl}`);
23
+ }
24
+ const sections = this.groupByPattern(urls);
25
+ return {
26
+ sitemap_url: sitemapUrl,
27
+ sections,
28
+ total_urls: urls.length,
29
+ };
30
+ }
31
+ /**
32
+ * Filter URLs based on include/exclude patterns.
33
+ * Patterns use simple glob matching: "/blog/*" matches "/blog/anything".
34
+ */
35
+ filterUrls(result, options) {
36
+ let urls = result.sections.flatMap((s) => s.urls);
37
+ if (options.include && options.include.length > 0) {
38
+ urls = urls.filter((url) => {
39
+ const path = new URL(url).pathname;
40
+ return options.include?.some((pattern) => this.matchPattern(path, pattern));
41
+ });
42
+ }
43
+ if (options.exclude && options.exclude.length > 0) {
44
+ urls = urls.filter((url) => {
45
+ const path = new URL(url).pathname;
46
+ return !options.exclude?.some((pattern) => this.matchPattern(path, pattern));
47
+ });
48
+ }
49
+ return urls;
50
+ }
51
+ /**
52
+ * Extract <loc> URLs from sitemap XML.
53
+ * Handles both regular sitemaps and sitemap indexes.
54
+ */
55
+ extractUrls(xml) {
56
+ const urls = [];
57
+ const locRegex = /<loc>\s*(.*?)\s*<\/loc>/gi;
58
+ for (const match of xml.matchAll(locRegex)) {
59
+ const url = match[1].trim();
60
+ if (url)
61
+ urls.push(url);
62
+ }
63
+ return urls;
64
+ }
65
+ /**
66
+ * Group URLs by their first path segment to create browsable sections.
67
+ */
68
+ groupByPattern(urls) {
69
+ const groups = new Map();
70
+ for (const url of urls) {
71
+ try {
72
+ const pathname = new URL(url).pathname;
73
+ const segments = pathname.split("/").filter(Boolean);
74
+ const section = segments.length > 0 ? `/${segments[0]}` : "/";
75
+ const existing = groups.get(section) ?? [];
76
+ existing.push(url);
77
+ groups.set(section, existing);
78
+ }
79
+ catch {
80
+ // Skip invalid URLs
81
+ }
82
+ }
83
+ return Array.from(groups.entries())
84
+ .map(([pattern, sectionUrls]) => ({
85
+ name: pattern === "/" ? "Root" : pattern.slice(1).charAt(0).toUpperCase() + pattern.slice(2),
86
+ pattern: `${pattern}/*`,
87
+ urls: sectionUrls,
88
+ count: sectionUrls.length,
89
+ }))
90
+ .sort((a, b) => b.count - a.count);
91
+ }
92
+ /**
93
+ * Simple glob pattern matching.
94
+ * Supports trailing wildcard: "/blog/*" matches "/blog/my-post".
95
+ */
96
+ matchPattern(path, pattern) {
97
+ if (pattern.endsWith("/*")) {
98
+ const prefix = pattern.slice(0, -2);
99
+ return path === prefix || path.startsWith(`${prefix}/`);
100
+ }
101
+ return path === pattern;
102
+ }
103
+ }
104
+ //# sourceMappingURL=sitemap-parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sitemap-parser.js","sourceRoot":"","sources":["../../src/ingestion/sitemap-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAG3C;;GAEG;AACH,MAAM,OAAO,aAAa;IACjB,OAAO,CAAU;IAEzB,YAAY,OAAiB;QAC5B,IAAI,CAAC,OAAO,GAAG,OAAO,IAAI,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAC7D,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,KAAK,CAAC,UAAkB;QAC7B,WAAW,CAAC,UAAU,CAAC,CAAC;QACxB,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;QAChD,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YAClB,MAAM,IAAI,KAAK,CAAC,2BAA2B,UAAU,KAAK,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAC9E,CAAC;QAED,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QAClC,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;QAEnC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,MAAM,IAAI,KAAK,CAAC,4BAA4B,UAAU,EAAE,CAAC,CAAC;QAC3D,CAAC;QAED,MAAM,QAAQ,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;QAE3C,OAAO;YACN,WAAW,EAAE,UAAU;YACvB,QAAQ;YACR,UAAU,EAAE,IAAI,CAAC,MAAM;SACvB,CAAC;IACH,CAAC;IAED;;;OAGG;IACH,UAAU,CAAC,MAAqB,EAAE,OAA0B;QAC3D,IAAI,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAElD,IAAI,OAAO,CAAC,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnD,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE;gBAC1B,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;gBACnC,OAAO,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC;YAC7E,CAAC,CAAC,CAAC;QACJ,CAAC;QAED,IAAI,OAAO,CAAC,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnD,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE;gBAC1B,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;gBACnC,OAAO,CAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC;YAC9E,CAAC,CAAC,CAAC;QACJ,CAAC;QAED,OAAO,IAAI,CAAC;IACb,CAAC;IAED;;;OAGG;IACK,WAAW,CAAC,GAAW;QAC9B,MAAM,IAAI,GAAa,EAAE,CAAC;QAC1B,MAAM,QAAQ,GAAG,2BAA2B,CAAC;QAC7C,KAAK,MAAM,KAAK,IAAI,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC5C,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAC5B,IAAI,GAAG;gBAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACzB,CAAC;QACD,OAAO,IAAI,CAAC;IACb,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,IAAc;QACpC,MAAM,MAAM,GAAG,IAAI,GAAG,EAAoB,CAAC;QAE3C,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACxB,IAAI,CAAC;gBACJ,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;gBACvC,MAAM,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;gBACrD,MAAM,OAAO,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;gBAC9D,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;gBAC3C,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACnB,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;YAC/B,CAAC;YAAC,MAAM,CAAC;gBACR,oBAAoB;YACrB,CAAC;QACF,CAAC;QAED,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;aACjC,GAAG,CAAC,CAAC,CAAC,OAAO,EAAE,WAAW,CAAC,EAAE,EAAE,CAAC,CAAC;YACjC,IAAI,EACH,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC;YACvF,OAAO,EAAE,GAAG,OAAO,IAAI;YACvB,IAAI,EAAE,WAAW;YACjB,KAAK,EAAE,WAAW,CAAC,MAAM;SACzB,CAAC,CAAC;aACF,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IACrC,CAAC;IAED;;;OAGG;IACK,YAAY,CAAC,IAAY,EAAE,OAAe;QACjD,IAAI,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YAC5B,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YACpC,OAAO,IAAI,KAAK,MAAM,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,MAAM,GAAG,CAAC,CAAC;QACzD,CAAC;QACD,OAAO,IAAI,KAAK,OAAO,CAAC;IACzB,CAAC;CACD"}