@knowledgine/core 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config/config-loader.d.ts.map +1 -1
- package/dist/config/config-loader.js +23 -8
- package/dist/config/config-loader.js.map +1 -1
- package/dist/embedding/onnx-embedding-provider.d.ts +1 -0
- package/dist/embedding/onnx-embedding-provider.d.ts.map +1 -1
- package/dist/embedding/onnx-embedding-provider.js +9 -3
- package/dist/embedding/onnx-embedding-provider.js.map +1 -1
- package/dist/feedback/feedback-learner.d.ts.map +1 -1
- package/dist/feedback/feedback-learner.js +3 -6
- package/dist/feedback/feedback-learner.js.map +1 -1
- package/dist/graph/entity-extractor.d.ts +17 -3
- package/dist/graph/entity-extractor.d.ts.map +1 -1
- package/dist/graph/entity-extractor.js +273 -20
- package/dist/graph/entity-extractor.js.map +1 -1
- package/dist/graph/graph-repository.d.ts +1 -0
- package/dist/graph/graph-repository.d.ts.map +1 -1
- package/dist/graph/graph-repository.js +7 -0
- package/dist/graph/graph-repository.js.map +1 -1
- package/dist/index.d.ts +5 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7 -0
- package/dist/index.js.map +1 -1
- package/dist/processing/file-processor.d.ts.map +1 -1
- package/dist/processing/file-processor.js +14 -3
- package/dist/processing/file-processor.js.map +1 -1
- package/dist/search/hybrid-searcher.d.ts.map +1 -1
- package/dist/search/hybrid-searcher.js +5 -1
- package/dist/search/hybrid-searcher.js.map +1 -1
- package/dist/search/knowledge-searcher.d.ts +2 -2
- package/dist/search/knowledge-searcher.d.ts.map +1 -1
- package/dist/search/knowledge-searcher.js +33 -15
- package/dist/search/knowledge-searcher.js.map +1 -1
- package/dist/search/link-generator.d.ts +1 -0
- package/dist/search/link-generator.d.ts.map +1 -1
- package/dist/search/link-generator.js +44 -11
- package/dist/search/link-generator.js.map +1 -1
- package/dist/search/query-orchestrator.d.ts +2 -2
- package/dist/search/query-orchestrator.d.ts.map +1 -1
- package/dist/search/query-orchestrator.js +43 -23
- package/dist/search/query-orchestrator.js.map +1 -1
- package/dist/search/reasoning-reranker.d.ts +4 -4
- package/dist/search/reasoning-reranker.d.ts.map +1 -1
- package/dist/search/reasoning-reranker.js +9 -5
- package/dist/search/reasoning-reranker.js.map +1 -1
- package/dist/search/semantic-searcher.d.ts +2 -2
- package/dist/search/semantic-searcher.d.ts.map +1 -1
- package/dist/search/semantic-searcher.js +5 -1
- package/dist/search/semantic-searcher.js.map +1 -1
- package/dist/services/knowledge-service.d.ts +1 -0
- package/dist/services/knowledge-service.d.ts.map +1 -1
- package/dist/services/knowledge-service.js +12 -1
- package/dist/services/knowledge-service.js.map +1 -1
- package/dist/storage/database.d.ts.map +1 -1
- package/dist/storage/database.js +5 -0
- package/dist/storage/database.js.map +1 -1
- package/dist/storage/knowledge-repository.d.ts +42 -0
- package/dist/storage/knowledge-repository.d.ts.map +1 -1
- package/dist/storage/knowledge-repository.js +278 -145
- package/dist/storage/knowledge-repository.js.map +1 -1
- package/dist/storage/migrations/011_fts_unicode61.d.ts +10 -0
- package/dist/storage/migrations/011_fts_unicode61.d.ts.map +1 -0
- package/dist/storage/migrations/011_fts_unicode61.js +88 -0
- package/dist/storage/migrations/011_fts_unicode61.js.map +1 -0
- package/dist/storage/migrations/012_fts_trigram_cjk.d.ts +10 -0
- package/dist/storage/migrations/012_fts_trigram_cjk.d.ts.map +1 -0
- package/dist/storage/migrations/012_fts_trigram_cjk.js +53 -0
- package/dist/storage/migrations/012_fts_trigram_cjk.js.map +1 -0
- package/dist/storage/schema.d.ts +1 -1
- package/dist/storage/schema.d.ts.map +1 -1
- package/dist/storage/schema.js +1 -1
- package/dist/utils/atomic-write.d.ts +2 -0
- package/dist/utils/atomic-write.d.ts.map +1 -0
- package/dist/utils/atomic-write.js +20 -0
- package/dist/utils/atomic-write.js.map +1 -0
- package/dist/utils/semantic-readiness.d.ts +13 -0
- package/dist/utils/semantic-readiness.d.ts.map +1 -0
- package/dist/utils/semantic-readiness.js +21 -0
- package/dist/utils/semantic-readiness.js.map +1 -0
- package/package.json +3 -3
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config-loader.d.ts","sourceRoot":"","sources":["../../src/config/config-loader.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"config-loader.d.ts","sourceRoot":"","sources":["../../src/config/config-loader.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,cAAc,CAAC;AAGtD,MAAM,WAAW,QAAQ;IACvB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE;QAAE,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;QAAC,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAA;KAAE,CAAC;IAC9D,MAAM,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,SAAS,GAAG,UAAU,GAAG,QAAQ,CAAC;QAAC,YAAY,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IACpF,KAAK,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,MAAM,CAAC;QAAC,IAAI,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IAChD,GAAG,CAAC,EAAE,OAAO,iBAAiB,EAAE,SAAS,CAAC;IAC1C,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAQD;;;GAGG;AACH,wBAAgB,UAAU,CAAC,QAAQ,EAAE,MAAM,GAAG,iBAAiB,CAgB9D;AAoCD;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,CAAC,EAAE,MAAM,GAAG,MAAM,CAW3D;AAED;;;GAGG;AACH,wBAAgB,aAAa,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,GAAG,IAAI,CAarE"}
|
|
@@ -1,7 +1,11 @@
|
|
|
1
|
-
import { readFileSync
|
|
1
|
+
import { readFileSync } from "fs";
|
|
2
2
|
import { resolve } from "path";
|
|
3
3
|
import { parse as parseYaml } from "yaml";
|
|
4
4
|
import { defineConfig } from "../config.js";
|
|
5
|
+
import { writeTextFileAtomically } from "../utils/atomic-write.js";
|
|
6
|
+
function hasErrnoCode(error, code) {
|
|
7
|
+
return (error instanceof Error && "code" in error && error.code === code);
|
|
8
|
+
}
|
|
5
9
|
/**
|
|
6
10
|
* Load knowledgine configuration from RC file and environment variables.
|
|
7
11
|
* Priority: env var > RC file > defaults (embedding.enabled = false)
|
|
@@ -26,12 +30,22 @@ function loadRcFile(startDir) {
|
|
|
26
30
|
const jsonPath = resolve(dir, ".knowledginerc.json");
|
|
27
31
|
const ymlPath = resolve(dir, ".knowledginerc.yml");
|
|
28
32
|
try {
|
|
29
|
-
|
|
33
|
+
try {
|
|
30
34
|
return JSON.parse(readFileSync(jsonPath, "utf-8"));
|
|
31
35
|
}
|
|
32
|
-
|
|
36
|
+
catch (error) {
|
|
37
|
+
if (!hasErrnoCode(error, "ENOENT")) {
|
|
38
|
+
throw error;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
try {
|
|
33
42
|
return parseYaml(readFileSync(ymlPath, "utf-8"));
|
|
34
43
|
}
|
|
44
|
+
catch (error) {
|
|
45
|
+
if (!hasErrnoCode(error, "ENOENT")) {
|
|
46
|
+
throw error;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
35
49
|
}
|
|
36
50
|
catch (error) {
|
|
37
51
|
console.error(`Warning: Failed to parse config file: ${error instanceof Error ? error.message : String(error)}`);
|
|
@@ -68,14 +82,15 @@ export function writeRcConfig(dirPath, config) {
|
|
|
68
82
|
const rcPath = resolve(dirPath, ".knowledginerc.json");
|
|
69
83
|
let existing = {};
|
|
70
84
|
try {
|
|
71
|
-
|
|
72
|
-
existing = JSON.parse(readFileSync(rcPath, "utf-8"));
|
|
73
|
-
}
|
|
85
|
+
existing = JSON.parse(readFileSync(rcPath, "utf-8"));
|
|
74
86
|
}
|
|
75
|
-
catch {
|
|
87
|
+
catch (error) {
|
|
88
|
+
if (hasErrnoCode(error, "ENOENT")) {
|
|
89
|
+
existing = {};
|
|
90
|
+
}
|
|
76
91
|
// If existing file is invalid, overwrite it
|
|
77
92
|
}
|
|
78
93
|
const merged = { ...existing, ...config };
|
|
79
|
-
|
|
94
|
+
writeTextFileAtomically(rcPath, JSON.stringify(merged, null, 2) + "\n");
|
|
80
95
|
}
|
|
81
96
|
//# sourceMappingURL=config-loader.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config-loader.js","sourceRoot":"","sources":["../../src/config/config-loader.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,
|
|
1
|
+
{"version":3,"file":"config-loader.js","sourceRoot":"","sources":["../../src/config/config-loader.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAClC,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAC/B,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,MAAM,CAAC;AAC1C,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,OAAO,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAC;AAYnE,SAAS,YAAY,CAAC,KAAc,EAAE,IAAY;IAChD,OAAO,CACL,KAAK,YAAY,KAAK,IAAI,MAAM,IAAI,KAAK,IAAK,KAA+B,CAAC,IAAI,KAAK,IAAI,CAC5F,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,UAAU,CAAC,QAAgB;IACzC,MAAM,QAAQ,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC;IAEtC,gCAAgC;IAChC,MAAM,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC,CAAC;IACxD,MAAM,eAAe,GACnB,WAAW,KAAK,MAAM,IAAI,WAAW,KAAK,GAAG,IAAI,QAAQ,EAAE,QAAQ,KAAK,IAAI,CAAC;IAE/E,OAAO,YAAY,CAAC;QAClB,QAAQ;QACR,SAAS,EAAE;YACT,SAAS,EAAE,kBAAkB;YAC7B,UAAU,EAAE,GAAG;YACf,OAAO,EAAE,eAAe;SACzB;KACF,CAAC,CAAC;AACL,CAAC;AAED,SAAS,UAAU,CAAC,QAAgB;IAClC,IAAI,GAAG,GAAG,QAAQ,CAAC;IACnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3B,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,EAAE,qBAAqB,CAAC,CAAC;QACrD,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,EAAE,oBAAoB,CAAC,CAAC;QACnD,IAAI,CAAC;YACH,IAAI,CAAC;gBACH,OAAO,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAa,CAAC;YACjE,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,IAAI,CAAC,YAAY,CAAC,KAAK,EAAE,QAAQ,CAAC,EAAE,CAAC;oBACnC,MAAM,KAAK,CAAC;gBACd,CAAC;YACH,CAAC;YAED,IAAI,CAAC;gBACH,OAAO,SAAS,CAAC,YAAY,CAAC,OAAO,EAAE,OAAO,CAAC,CAAa,CAAC;YAC/D,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,IAAI,CAAC,YAAY,CAAC,KAAK,EAAE,QAAQ,CAAC,EAAE,CAAC;oBACnC,MAAM,KAAK,CAAC;gBACd,CAAC;YACH,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CACX,yCAAyC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAClG,CAAC;YACF,OAAO,IAAI,CAAC;QACd,CAAC;QACD,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;QAClC,IAAI,MAAM,KAAK,GAAG;YAAE,MAAM,CAAC,SAAS;QACpC,GAAG,GAAG,MAAM,CAAC;IACf,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAAC,OAAgB;IACjD,IAAI,OAAO;QAAE,OAAO,OAAO,CAAC,OAAO,CAAC,CAAC;IAErC,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAC;IAChD,IAAI,OAAO;QAAE,OAAO,OAAO,CAAC,OAAO,CAAC,CAAC;IAErC,oFAAoF;IACpF,MAAM,QAAQ,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;IAC3C,IAAI,QAAQ,EAAE,WAAW;QAAE,OAAO,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;IAEhE,OAAO,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;AAChC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,aAAa,CAAC,OAAe,EAAE,MAAgB;IAC7D,MAAM,MAAM,GAAG,OAAO,CAAC,OAAO,EAAE,qBAAqB,CAAC,CAAC;IACvD,IAAI,QAAQ,GAAa,EAAE,CAAC;IAC5B,IAAI,CAAC;QACH,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,MAAM,EAAE,OAAO,CAAC,CAAa,CAAC;IACnE,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,YAAY,CAAC,KAAK,EAAE,QAAQ,CAAC,EAAE,CAAC;YAClC,QAAQ,GAAG,EAAE,CAAC;QAChB,CAAC;QACD,4CAA4C;IAC9C,CAAC;IACD,MAAM,MAAM,GAAG,EAAE,GAAG,QAAQ,EAAE,GAAG,MAAM,EAAE,CAAC;IAC1C,uBAAuB,CAAC,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC;AAC1E,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-embedding-provider.d.ts","sourceRoot":"","sources":["../../src/embedding/onnx-embedding-provider.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAEjE,OAAO,EAAE,YAAY,EAAsB,MAAM,oBAAoB,CAAC;AAKtE,qBAAa,qBAAsB,YAAW,iBAAiB;IAC7D,OAAO,CAAC,OAAO,CAAiC;IAChD,OAAO,CAAC,SAAS,CAAmC;IACpD,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,YAAY,CAAe;
|
|
1
|
+
{"version":3,"file":"onnx-embedding-provider.d.ts","sourceRoot":"","sources":["../../src/embedding/onnx-embedding-provider.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAEjE,OAAO,EAAE,YAAY,EAAsB,MAAM,oBAAoB,CAAC;AAKtE,qBAAa,qBAAsB,YAAW,iBAAiB;IAC7D,OAAO,CAAC,OAAO,CAAiC;IAChD,OAAO,CAAC,SAAS,CAAmC;IACpD,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,YAAY,CAAe;IACnC,OAAO,CAAC,GAAG,CAAkD;gBAEjD,SAAS,GAAE,MAA2B,EAAE,YAAY,CAAC,EAAE,YAAY;YAKjE,UAAU;IA0BxB,OAAO,CAAC,YAAY;IAWd,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAK1C,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAyD1D,aAAa,IAAI,MAAM;IAIjB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAO5B,OAAO,CAAC,QAAQ;IA0BhB,OAAO,CAAC,SAAS;CAalB"}
|
|
@@ -7,6 +7,7 @@ export class OnnxEmbeddingProvider {
|
|
|
7
7
|
tokenizer = null;
|
|
8
8
|
modelName;
|
|
9
9
|
modelManager;
|
|
10
|
+
ort = null;
|
|
10
11
|
constructor(modelName = DEFAULT_MODEL_NAME, modelManager) {
|
|
11
12
|
this.modelName = modelName;
|
|
12
13
|
this.modelManager = modelManager ?? new ModelManager();
|
|
@@ -18,8 +19,10 @@ export class OnnxEmbeddingProvider {
|
|
|
18
19
|
throw new EmbeddingNotAvailableError(`Model "${this.modelName}" not found. Run 'knowledgine init' to download the model automatically.`);
|
|
19
20
|
}
|
|
20
21
|
try {
|
|
21
|
-
|
|
22
|
-
|
|
22
|
+
if (!this.ort) {
|
|
23
|
+
this.ort = await import("onnxruntime-node");
|
|
24
|
+
}
|
|
25
|
+
this.session = await this.ort.InferenceSession.create(this.modelManager.getModelPath(this.modelName), { executionProviders: ["cpu"] });
|
|
23
26
|
return this.session;
|
|
24
27
|
}
|
|
25
28
|
catch (error) {
|
|
@@ -43,7 +46,10 @@ export class OnnxEmbeddingProvider {
|
|
|
43
46
|
const session = await this.getSession();
|
|
44
47
|
const tokenizer = this.getTokenizer();
|
|
45
48
|
try {
|
|
46
|
-
|
|
49
|
+
if (!this.ort) {
|
|
50
|
+
this.ort = await import("onnxruntime-node");
|
|
51
|
+
}
|
|
52
|
+
const ort = this.ort;
|
|
47
53
|
const results = [];
|
|
48
54
|
for (const text of texts) {
|
|
49
55
|
const encoded = tokenizer.encode(text);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-embedding-provider.js","sourceRoot":"","sources":["../../src/embedding/onnx-embedding-provider.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AACpD,OAAO,EAAE,YAAY,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AACtE,OAAO,EAAE,0BAA0B,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAE1E,MAAM,UAAU,GAAG,GAAG,CAAC;AAEvB,MAAM,OAAO,qBAAqB;IACxB,OAAO,GAA4B,IAAI,CAAC;IACxC,SAAS,GAA8B,IAAI,CAAC;IAC5C,SAAS,CAAS;IAClB,YAAY,CAAe;
|
|
1
|
+
{"version":3,"file":"onnx-embedding-provider.js","sourceRoot":"","sources":["../../src/embedding/onnx-embedding-provider.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AACpD,OAAO,EAAE,YAAY,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AACtE,OAAO,EAAE,0BAA0B,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAE1E,MAAM,UAAU,GAAG,GAAG,CAAC;AAEvB,MAAM,OAAO,qBAAqB;IACxB,OAAO,GAA4B,IAAI,CAAC;IACxC,SAAS,GAA8B,IAAI,CAAC;IAC5C,SAAS,CAAS;IAClB,YAAY,CAAe;IAC3B,GAAG,GAA6C,IAAI,CAAC;IAE7D,YAAY,YAAoB,kBAAkB,EAAE,YAA2B;QAC7E,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,YAAY,GAAG,YAAY,IAAI,IAAI,YAAY,EAAE,CAAC;IACzD,CAAC;IAEO,KAAK,CAAC,UAAU;QACtB,IAAI,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC,OAAO,CAAC;QAEtC,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;YACxD,MAAM,IAAI,0BAA0B,CAClC,UAAU,IAAI,CAAC,SAAS,0EAA0E,CACnG,CAAC;QACJ,CAAC;QAED,IAAI,CAAC;YACH,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC;gBACd,IAAI,CAAC,GAAG,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;YAC9C,CAAC;YACD,IAAI,CAAC,OAAO,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,MAAM,CACnD,IAAI,CAAC,YAAY,CAAC,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC,EAC9C,EAAE,kBAAkB,EAAE,CAAC,KAAK,CAAC,EAAE,CAChC,CAAC;YACF,OAAO,IAAI,CAAC,OAAO,CAAC;QACtB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,cAAc,CACtB,8BAA8B,IAAI,CAAC,SAAS,GAAG,EAC/C,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAC1D,CAAC;QACJ,CAAC;IACH,CAAC;IAEO,YAAY;QAClB,IAAI,IAAI,CAAC,SAAS;YAAE,OAAO,IAAI,CAAC,SAAS,CAAC;QAC1C,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;YACxD,MAAM,IAAI,0BAA0B,CAClC,UAAU,IAAI,CAAC,SAAS,0EAA0E,CACnG,CAAC;QACJ,CAAC;QACD,IAAI,CAAC,SAAS,GAAG,IAAI,kBAAkB,CAAC,IAAI,CAAC,YAAY,CAAC,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;QAC5F,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,IAAY;QACtB,MAAM,CAAC,MAAM,CAAC,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QAC/C,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAe;QAC9B,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;QACxC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QAEtC,IAAI,CAAC;YACH,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC;gBACd,IAAI,CAAC,GAAG,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;YAC9C,CAAC;YACD,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC;YACrB,MAAM,OAAO,GAAmB,EAAE,CAAC;YAEnC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACzB,MAAM,OAAO,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;gBACvC,MAAM,MAAM,GAAG,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAEvC,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EAAE;oBACzF,CAAC;oBACD,MAAM;iBACP,CAAC,CAAC;gBACH,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,MAAM,CAClC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EACrD,CAAC,CAAC,EAAE,MAAM,CAAC,CACZ,CAAC;gBACF,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,MAAM,CACjC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EACpD,CAAC,CAAC,EAAE,MAAM,CAAC,CACZ,CAAC;gBAEF,MAAM,KAAK,GAA2B;oBACpC,SAAS,EAAE,QAAQ;oBACnB,cAAc,EAAE,aAAa;oBAC7B,cAAc,EAAE,YAAY;iBAC7B,CAAC;gBACF,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBAExC,yDAAyD;gBACzD,MAAM,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;gBACzC,MAAM,eAAe,GAAG,MAAM,CAAC,mBAAmB,CAAC,IAAI,MAAM,CAAC,SAAS,CAAC,CAAC;gBACzE,MAAM,IAAI,GAAG,eAAe,CAAC,IAAoB,CAAC;gBAClD,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,aAAa,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;gBACjF,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC,CAAC;YAC1C,CAAC;YAED,OAAO,OAAO,CAAC;QACjB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,KAAK,YAAY,0BAA0B,IAAI,KAAK,YAAY,cAAc,EAAE,CAAC;gBACnF,MAAM,KAAK,CAAC;YACd,CAAC;YACD,MAAM,IAAI,cAAc,CACtB,uBAAuB,EACvB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAC1D,CAAC;QACJ,CAAC;IACH,CAAC;IAED,aAAa;QACX,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,KAAK,CAAC,KAAK;QACT,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;YAC7B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;IACH,CAAC;IAEO,QAAQ,CACd,IAAkB,EAClB,aAAuB,EACvB,MAAc,EACd,UAAkB;QAElB,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,UAAU,CAAC,CAAC;QAC5C,IAAI,KAAK,GAAG,CAAC,CAAC;QAEd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAChC,IAAI,aAAa,CAAC,CAAC,CAAC,KAAK,CAAC;gBAAE,SAAS;YACrC,KAAK,EAAE,CAAC;YACR,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;gBACpC,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC;YACxC,CAAC;QACH,CAAC;QAED,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;YACd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;gBACpC,MAAM,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC;YACrB,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,SAAS,CAAC,GAAiB;QACjC,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,KAAK,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC;YACpB,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC;QAChB,CAAC;QACD,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvB,IAAI,IAAI,KAAK,CAAC;YAAE,OAAO,GAAG,CAAC;QAC3B,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAC5C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACpC,MAAM,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;QAC5B,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;CACF"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"feedback-learner.d.ts","sourceRoot":"","sources":["../../src/feedback/feedback-learner.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"feedback-learner.d.ts","sourceRoot":"","sources":["../../src/feedback/feedback-learner.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC;AAG9D,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE;QAAE,KAAK,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;IAC/B,aAAa,EAAE,YAAY,EAAE,CAAC;IAC9B,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,eAAe,EAAE,cAAc,EAAE,CAAC;CACnC;AAaD,qBAAa,eAAe;IAExB,OAAO,CAAC,kBAAkB;IAC1B,OAAO,CAAC,SAAS;gBADT,kBAAkB,EAAE,kBAAkB,EACtC,SAAS,EAAE,MAAM;IAG3B,aAAa,CAAC,UAAU,EAAE,MAAM,GAAG,IAAI;IAyDvC,SAAS,IAAI,eAAe;IAS5B,OAAO,CAAC,SAAS;CAGlB"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { readFileSync
|
|
2
|
-
import { join, dirname } from "path";
|
|
1
|
+
import { readFileSync } from "fs";
|
|
3
2
|
import { FeedbackRepository } from "./feedback-repository.js";
|
|
3
|
+
import { writeTextFileAtomically } from "../utils/atomic-write.js";
|
|
4
4
|
function createEmptyRules() {
|
|
5
5
|
return {
|
|
6
6
|
version: 1,
|
|
@@ -79,10 +79,7 @@ export class FeedbackLearner {
|
|
|
79
79
|
}
|
|
80
80
|
}
|
|
81
81
|
saveRules(rules) {
|
|
82
|
-
|
|
83
|
-
const tmpPath = join(dir, `.extraction-rules.tmp.${process.pid}.json`);
|
|
84
|
-
writeFileSync(tmpPath, JSON.stringify(rules, null, 2), "utf-8");
|
|
85
|
-
renameSync(tmpPath, this.rulesPath);
|
|
82
|
+
writeTextFileAtomically(this.rulesPath, JSON.stringify(rules, null, 2));
|
|
86
83
|
}
|
|
87
84
|
}
|
|
88
85
|
//# sourceMappingURL=feedback-learner.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"feedback-learner.js","sourceRoot":"","sources":["../../src/feedback/feedback-learner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,
|
|
1
|
+
{"version":3,"file":"feedback-learner.js","sourceRoot":"","sources":["../../src/feedback/feedback-learner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAClC,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC;AAC9D,OAAO,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAC;AAsBnE,SAAS,gBAAgB;IACvB,OAAO;QACL,OAAO,EAAE,CAAC;QACV,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,SAAS,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE;QACxB,aAAa,EAAE,EAAE;QACjB,eAAe,EAAE,EAAE;QACnB,eAAe,EAAE,EAAE;KACpB,CAAC;AACJ,CAAC;AAED,MAAM,OAAO,eAAe;IAEhB;IACA;IAFV,YACU,kBAAsC,EACtC,SAAiB;QADjB,uBAAkB,GAAlB,kBAAkB,CAAoB;QACtC,cAAS,GAAT,SAAS,CAAQ;IACxB,CAAC;IAEJ,aAAa,CAAC,UAAkB;QAC9B,MAAM,QAAQ,GAAG,IAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;QAC7D,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,IAAI,KAAK,CAAC,iCAAiC,UAAU,EAAE,CAAC,CAAC;QACjE,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;QAE/B,QAAQ,QAAQ,CAAC,SAAS,EAAE,CAAC;YAC3B,KAAK,gBAAgB;gBACnB,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,QAAQ,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;oBACzD,KAAK,CAAC,eAAe,CAAC,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;gBAClD,CAAC;gBACD,MAAM;YAER,KAAK,YAAY;gBACf,IAAI,CAAC,QAAQ,CAAC,UAAU,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;oBAClD,MAAM,IAAI,KAAK,CAAC,8DAA8D,CAAC,CAAC;gBAClF,CAAC;gBACD,mDAAmD;gBACnD,MAAM,WAAW,GAAG,KAAK,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,UAAU,CAAC,CAAC;gBACzF,MAAM,QAAQ,GAAiB;oBAC7B,IAAI,EAAE,QAAQ,CAAC,UAAU;oBACzB,QAAQ,EAAE,QAAQ,CAAC,UAAU;oBAC7B,MAAM,EAAE,QAAQ,CAAC,WAAW;iBAC7B,CAAC;gBACF,IAAI,WAAW,IAAI,CAAC,EAAE,CAAC;oBACrB,KAAK,CAAC,aAAa,CAAC,WAAW,CAAC,GAAG,QAAQ,CAAC;gBAC9C,CAAC;qBAAM,CAAC;oBACN,KAAK,CAAC,aAAa,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBACrC,CAAC;gBACD,MAAM;YAER,KAAK,eAAe,CAAC,CAAC,CAAC;gBACrB,MAAM,UAAU,GAAG,QAAQ,CAAC,WAAW,IAAI,QAAQ,CAAC,UAAU,IAAI,YAAY,CAAC;gBAC/E,0DAA0D;gBAC1D,MAAM,aAAa,GAAG,KAAK,CAAC,eAAe,CAAC,SAAS,CACnD,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,UAAU,CACtC,CAAC;gBACF,MAAM,KAAK,GAAmB;oBAC5B,IAAI,EAAE,QAAQ,CAAC,UAAU;oBACzB,IAAI,EAAE,UAAU;iBACjB,CAAC;gBACF,IAAI,aAAa,IAAI,CAAC,EAAE,CAAC;oBACvB,KAAK,CAAC,eAAe,CAAC,aAAa,CAAC,GAAG,KAAK,CAAC;gBAC/C,CAAC;qBAAM,CAAC;oBACN,KAAK,CAAC,eAAe,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACpC,CAAC;gBACD,MAAM;YACR,CAAC;QACH,CAAC;QAED,KAAK,CAAC,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAC3C,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QACtB,IAAI,CAAC,kBAAkB,CAAC,YAAY,CAAC,UAAU,EAAE,SAAS,CAAC,CAAC;IAC9D,CAAC;IAED,SAAS;QACP,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;YACtD,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAoB,CAAC;QAChD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,gBAAgB,EAAE,CAAC;QAC5B,CAAC;IACH,CAAC;IAEO,SAAS,CAAC,KAAsB;QACtC,uBAAuB,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAC1E,CAAC;CACF"}
|
|
@@ -5,9 +5,6 @@ export interface ExtractedEntity {
|
|
|
5
5
|
entityType: EntityType;
|
|
6
6
|
sourceType: "tag" | "import" | "link" | "code" | "mention" | "frontmatter" | "whitelist";
|
|
7
7
|
}
|
|
8
|
-
/**
|
|
9
|
-
* Extracts entities from note text, with optional post-processing via ExtractionRules.
|
|
10
|
-
*/
|
|
11
8
|
export declare class EntityExtractor {
|
|
12
9
|
private rules?;
|
|
13
10
|
private static readonly MIME_PREFIXES;
|
|
@@ -18,11 +15,28 @@ export declare class EntityExtractor {
|
|
|
18
15
|
private extractFromFrontmatterTags;
|
|
19
16
|
private extractFromFrontmatterFields;
|
|
20
17
|
private extractImports;
|
|
18
|
+
private extractImportSpecifier;
|
|
19
|
+
private findStandaloneRequireIndex;
|
|
20
|
+
private findRequireCallOpenParenIndex;
|
|
21
|
+
private findQuoteIndex;
|
|
22
|
+
private readQuotedValue;
|
|
21
23
|
private extractMarkdownLinks;
|
|
22
24
|
private extractInlineCode;
|
|
23
25
|
private extractMentions;
|
|
26
|
+
private classifyMention;
|
|
24
27
|
private extractOrgRepos;
|
|
28
|
+
/**
|
|
29
|
+
* Strip URLs from content, replacing them with whitespace to prevent
|
|
30
|
+
* URL path fragments from being matched as org/repo.
|
|
31
|
+
*/
|
|
32
|
+
private stripUrls;
|
|
33
|
+
/**
|
|
34
|
+
* Extract org/repo from GitHub URLs (e.g., github.com/org/repo/...).
|
|
35
|
+
* Only extracts the org/repo portion, not deeper path segments.
|
|
36
|
+
*/
|
|
37
|
+
private extractOrgReposFromUrls;
|
|
25
38
|
private isStopWord;
|
|
39
|
+
private isIdentifierChar;
|
|
26
40
|
private resolveEntityTypes;
|
|
27
41
|
private deduplicate;
|
|
28
42
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"entity-extractor.d.ts","sourceRoot":"","sources":["../../src/graph/entity-extractor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAC9C,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAGvE,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,UAAU,CAAC;IACvB,UAAU,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,GAAG,SAAS,GAAG,aAAa,GAAG,WAAW,CAAC;CAC1F;
|
|
1
|
+
{"version":3,"file":"entity-extractor.d.ts","sourceRoot":"","sources":["../../src/graph/entity-extractor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAC9C,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAGvE,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,UAAU,CAAC;IACvB,UAAU,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,GAAG,SAAS,GAAG,aAAa,GAAG,WAAW,CAAC;CAC1F;AAyMD,qBAAa,eAAe;IAC1B,OAAO,CAAC,KAAK,CAAC,CAAkB;IAEhC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,aAAa,CAUlC;IAEH,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,aAAa,CAkClC;gBAES,KAAK,CAAC,EAAE,eAAe;IAInC,OAAO,CAAC,OAAO,EAAE,MAAM,EAAE,WAAW,GAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAM,GAAG,eAAe,EAAE;IA2BtF,OAAO,CAAC,UAAU;IAiClB,OAAO,CAAC,0BAA0B;IAclC,OAAO,CAAC,4BAA4B;IAsCpC,OAAO,CAAC,cAAc;IAmBtB,OAAO,CAAC,sBAAsB;IA0B9B,OAAO,CAAC,0BAA0B;IAyBlC,OAAO,CAAC,6BAA6B;IAcrC,OAAO,CAAC,cAAc;IAStB,OAAO,CAAC,eAAe;IAOvB,OAAO,CAAC,oBAAoB;IAmB5B,OAAO,CAAC,iBAAiB;IAezB,OAAO,CAAC,eAAe;IAcvB,OAAO,CAAC,eAAe;IAUvB,OAAO,CAAC,eAAe;IAsBvB;;;OAGG;IACH,OAAO,CAAC,SAAS;IAKjB;;;OAGG;IACH,OAAO,CAAC,uBAAuB;IAgB/B,OAAO,CAAC,UAAU;IAIlB,OAAO,CAAC,gBAAgB;IAUxB,OAAO,CAAC,kBAAkB;IAc1B,OAAO,CAAC,WAAW;CAUpB"}
|
|
@@ -73,6 +73,127 @@ const STOP_LIST = new Set([
|
|
|
73
73
|
/**
|
|
74
74
|
* Extracts entities from note text, with optional post-processing via ExtractionRules.
|
|
75
75
|
*/
|
|
76
|
+
/**
|
|
77
|
+
* Known technology/tool names that should never be classified as person.
|
|
78
|
+
* Maps to preferred entity type.
|
|
79
|
+
*/
|
|
80
|
+
const TECH_DICTIONARY = new Map([
|
|
81
|
+
// Databases
|
|
82
|
+
["redis", "technology"],
|
|
83
|
+
["postgresql", "technology"],
|
|
84
|
+
["postgres", "technology"],
|
|
85
|
+
["mongodb", "technology"],
|
|
86
|
+
["mysql", "technology"],
|
|
87
|
+
["sqlite", "technology"],
|
|
88
|
+
["elasticsearch", "technology"],
|
|
89
|
+
["dynamodb", "technology"],
|
|
90
|
+
["cassandra", "technology"],
|
|
91
|
+
// Containers & orchestration
|
|
92
|
+
["docker", "technology"],
|
|
93
|
+
["kubernetes", "technology"],
|
|
94
|
+
["podman", "technology"],
|
|
95
|
+
// Web servers & proxies
|
|
96
|
+
["nginx", "technology"],
|
|
97
|
+
["apache", "technology"],
|
|
98
|
+
["caddy", "technology"],
|
|
99
|
+
["traefik", "technology"],
|
|
100
|
+
// Languages & runtimes
|
|
101
|
+
["python", "technology"],
|
|
102
|
+
["golang", "technology"],
|
|
103
|
+
["rust", "technology"],
|
|
104
|
+
["swift", "technology"],
|
|
105
|
+
["kotlin", "technology"],
|
|
106
|
+
["deno", "technology"],
|
|
107
|
+
["bun", "technology"],
|
|
108
|
+
// Frameworks & libraries
|
|
109
|
+
["react", "technology"],
|
|
110
|
+
["angular", "technology"],
|
|
111
|
+
["vue", "technology"],
|
|
112
|
+
["svelte", "technology"],
|
|
113
|
+
["django", "technology"],
|
|
114
|
+
["flask", "technology"],
|
|
115
|
+
["express", "technology"],
|
|
116
|
+
["fastapi", "technology"],
|
|
117
|
+
["nextjs", "technology"],
|
|
118
|
+
["nuxt", "technology"],
|
|
119
|
+
// Tools
|
|
120
|
+
["webpack", "tool"],
|
|
121
|
+
["vite", "tool"],
|
|
122
|
+
["eslint", "tool"],
|
|
123
|
+
["prettier", "tool"],
|
|
124
|
+
["babel", "tool"],
|
|
125
|
+
["terraform", "tool"],
|
|
126
|
+
["ansible", "tool"],
|
|
127
|
+
["grafana", "tool"],
|
|
128
|
+
["prometheus", "tool"],
|
|
129
|
+
// Cloud & services
|
|
130
|
+
["vercel", "technology"],
|
|
131
|
+
["netlify", "technology"],
|
|
132
|
+
["heroku", "technology"],
|
|
133
|
+
["supabase", "technology"],
|
|
134
|
+
["firebase", "technology"],
|
|
135
|
+
]);
|
|
136
|
+
/**
|
|
137
|
+
* Programming concepts that should not be classified as person.
|
|
138
|
+
*/
|
|
139
|
+
const NOT_PERSON_LIST = new Set([
|
|
140
|
+
"sandbox",
|
|
141
|
+
"middleware",
|
|
142
|
+
"proxy",
|
|
143
|
+
"daemon",
|
|
144
|
+
"worker",
|
|
145
|
+
"handler",
|
|
146
|
+
"listener",
|
|
147
|
+
"observer",
|
|
148
|
+
"adapter",
|
|
149
|
+
"bridge",
|
|
150
|
+
"factory",
|
|
151
|
+
"builder",
|
|
152
|
+
"parser",
|
|
153
|
+
"compiler",
|
|
154
|
+
"runtime",
|
|
155
|
+
"kernel",
|
|
156
|
+
"scheduler",
|
|
157
|
+
"dispatcher",
|
|
158
|
+
"controller",
|
|
159
|
+
"resolver",
|
|
160
|
+
"navigator",
|
|
161
|
+
"provider",
|
|
162
|
+
"consumer",
|
|
163
|
+
"producer",
|
|
164
|
+
"generator",
|
|
165
|
+
"iterator",
|
|
166
|
+
"executor",
|
|
167
|
+
"loader",
|
|
168
|
+
"renderer",
|
|
169
|
+
"emitter",
|
|
170
|
+
"pipeline",
|
|
171
|
+
"gateway",
|
|
172
|
+
"registry",
|
|
173
|
+
"container",
|
|
174
|
+
"server",
|
|
175
|
+
"client",
|
|
176
|
+
"agent",
|
|
177
|
+
"bot",
|
|
178
|
+
"cli",
|
|
179
|
+
"api",
|
|
180
|
+
"sdk",
|
|
181
|
+
"env",
|
|
182
|
+
"config",
|
|
183
|
+
"admin",
|
|
184
|
+
"debug",
|
|
185
|
+
"staging",
|
|
186
|
+
"production",
|
|
187
|
+
"development",
|
|
188
|
+
"localhost",
|
|
189
|
+
"webhook",
|
|
190
|
+
"cron",
|
|
191
|
+
"cache",
|
|
192
|
+
"queue",
|
|
193
|
+
"stack",
|
|
194
|
+
"heap",
|
|
195
|
+
"buffer",
|
|
196
|
+
]);
|
|
76
197
|
export class EntityExtractor {
|
|
77
198
|
rules;
|
|
78
199
|
static MIME_PREFIXES = new Set([
|
|
@@ -99,6 +220,27 @@ export class EntityExtractor {
|
|
|
99
220
|
"public",
|
|
100
221
|
"assets",
|
|
101
222
|
"static",
|
|
223
|
+
"com",
|
|
224
|
+
"org",
|
|
225
|
+
"net",
|
|
226
|
+
"io",
|
|
227
|
+
"dev",
|
|
228
|
+
"tools",
|
|
229
|
+
"blob",
|
|
230
|
+
"tree",
|
|
231
|
+
"main",
|
|
232
|
+
"master",
|
|
233
|
+
"profile",
|
|
234
|
+
"api",
|
|
235
|
+
"raw",
|
|
236
|
+
"releases",
|
|
237
|
+
"actions",
|
|
238
|
+
"settings",
|
|
239
|
+
"examples",
|
|
240
|
+
"wiki",
|
|
241
|
+
"issues",
|
|
242
|
+
"pulls",
|
|
243
|
+
"packages",
|
|
102
244
|
]);
|
|
103
245
|
constructor(rules) {
|
|
104
246
|
this.rules = rules;
|
|
@@ -202,28 +344,87 @@ export class EntityExtractor {
|
|
|
202
344
|
}
|
|
203
345
|
extractImports(content) {
|
|
204
346
|
const results = [];
|
|
205
|
-
// import X from 'package' / import { X } from 'package' / require('package')
|
|
206
|
-
// Use separate patterns to avoid polynomial backtracking on user-controlled input
|
|
207
|
-
const fromRe = /\bfrom\s+['"]([^'"./][^'"]*)['"]/g;
|
|
208
|
-
const requireRe = /\brequire\s*\(\s*['"]([^'"./][^'"]*)['"]\s*\)/g;
|
|
209
347
|
const seen = new Set();
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
if (!pkg || this.isStopWord(pkg) || seen.has(pkg))
|
|
348
|
+
for (const rawLine of content.split(/\r?\n/)) {
|
|
349
|
+
const specifier = this.extractImportSpecifier(rawLine.trim());
|
|
350
|
+
if (!specifier || specifier.startsWith(".") || specifier.startsWith("/"))
|
|
214
351
|
continue;
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
if (!pkg || this.isStopWord(pkg))
|
|
352
|
+
const pkg = specifier.startsWith("@")
|
|
353
|
+
? specifier.split("/").slice(0, 2).join("/")
|
|
354
|
+
: specifier.split("/")[0];
|
|
355
|
+
const normalized = pkg.toLowerCase();
|
|
356
|
+
if (!pkg || this.isStopWord(normalized) || seen.has(normalized))
|
|
221
357
|
continue;
|
|
222
|
-
|
|
223
|
-
results.push({ name:
|
|
358
|
+
seen.add(normalized);
|
|
359
|
+
results.push({ name: normalized, entityType: "technology", sourceType: "import" });
|
|
224
360
|
}
|
|
225
361
|
return results;
|
|
226
362
|
}
|
|
363
|
+
extractImportSpecifier(line) {
|
|
364
|
+
if (!line)
|
|
365
|
+
return null;
|
|
366
|
+
if (line.startsWith("import ")) {
|
|
367
|
+
const fromIndex = line.lastIndexOf(" from ");
|
|
368
|
+
const quoteIndex = fromIndex >= 0
|
|
369
|
+
? this.findQuoteIndex(line, fromIndex + " from ".length)
|
|
370
|
+
: this.findQuoteIndex(line, "import ".length);
|
|
371
|
+
return quoteIndex >= 0 ? this.readQuotedValue(line, quoteIndex) : null;
|
|
372
|
+
}
|
|
373
|
+
const requireIndex = this.findStandaloneRequireIndex(line);
|
|
374
|
+
if (requireIndex >= 0) {
|
|
375
|
+
const openParenIndex = this.findRequireCallOpenParenIndex(line, requireIndex + "require".length);
|
|
376
|
+
if (openParenIndex < 0)
|
|
377
|
+
return null;
|
|
378
|
+
const quoteIndex = this.findQuoteIndex(line, openParenIndex + 1);
|
|
379
|
+
return quoteIndex >= 0 ? this.readQuotedValue(line, quoteIndex) : null;
|
|
380
|
+
}
|
|
381
|
+
return null;
|
|
382
|
+
}
|
|
383
|
+
findStandaloneRequireIndex(line) {
|
|
384
|
+
let searchIndex = 0;
|
|
385
|
+
while (searchIndex < line.length) {
|
|
386
|
+
const requireIndex = line.indexOf("require", searchIndex);
|
|
387
|
+
if (requireIndex < 0)
|
|
388
|
+
return -1;
|
|
389
|
+
const prevChar = requireIndex > 0 ? line[requireIndex - 1] : "";
|
|
390
|
+
const nextIndex = requireIndex + "require".length;
|
|
391
|
+
const nextChar = nextIndex < line.length ? line[nextIndex] : "";
|
|
392
|
+
if (prevChar !== "." &&
|
|
393
|
+
!this.isIdentifierChar(prevChar) &&
|
|
394
|
+
!this.isIdentifierChar(nextChar)) {
|
|
395
|
+
return requireIndex;
|
|
396
|
+
}
|
|
397
|
+
searchIndex = nextIndex;
|
|
398
|
+
}
|
|
399
|
+
return -1;
|
|
400
|
+
}
|
|
401
|
+
findRequireCallOpenParenIndex(line, startIndex) {
|
|
402
|
+
for (let i = startIndex; i < line.length; i++) {
|
|
403
|
+
const char = line[i];
|
|
404
|
+
if (char === "(") {
|
|
405
|
+
return i;
|
|
406
|
+
}
|
|
407
|
+
if (char !== " " && char !== "\t") {
|
|
408
|
+
return -1;
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
return -1;
|
|
412
|
+
}
|
|
413
|
+
findQuoteIndex(line, startIndex) {
|
|
414
|
+
for (let i = startIndex; i < line.length; i++) {
|
|
415
|
+
if (line[i] === "'" || line[i] === '"') {
|
|
416
|
+
return i;
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
return -1;
|
|
420
|
+
}
|
|
421
|
+
readQuotedValue(line, quoteIndex) {
|
|
422
|
+
const quote = line[quoteIndex];
|
|
423
|
+
const endQuoteIndex = line.indexOf(quote, quoteIndex + 1);
|
|
424
|
+
if (endQuoteIndex < 0)
|
|
425
|
+
return null;
|
|
426
|
+
return line.slice(quoteIndex + 1, endQuoteIndex);
|
|
427
|
+
}
|
|
227
428
|
extractMarkdownLinks(content) {
|
|
228
429
|
const results = [];
|
|
229
430
|
// [text](url) — internal links might be entities (exclude image syntax ![ )
|
|
@@ -263,34 +464,86 @@ export class EntityExtractor {
|
|
|
263
464
|
let m;
|
|
264
465
|
while ((m = mentionRe.exec(content)) !== null) {
|
|
265
466
|
const username = m[1].trim().toLowerCase();
|
|
266
|
-
if (
|
|
267
|
-
|
|
268
|
-
}
|
|
467
|
+
if (this.isStopWord(username))
|
|
468
|
+
continue;
|
|
469
|
+
const { entityType, sourceType } = this.classifyMention(username);
|
|
470
|
+
results.push({ name: username, entityType, sourceType });
|
|
269
471
|
}
|
|
270
472
|
return results;
|
|
271
473
|
}
|
|
474
|
+
classifyMention(name) {
|
|
475
|
+
const techType = TECH_DICTIONARY.get(name);
|
|
476
|
+
if (techType)
|
|
477
|
+
return { entityType: techType, sourceType: "mention" };
|
|
478
|
+
if (NOT_PERSON_LIST.has(name))
|
|
479
|
+
return { entityType: "concept", sourceType: "mention" };
|
|
480
|
+
return { entityType: "person", sourceType: "mention" };
|
|
481
|
+
}
|
|
272
482
|
extractOrgRepos(content) {
|
|
273
483
|
const results = [];
|
|
484
|
+
const cleanContent = this.stripUrls(content);
|
|
274
485
|
// org/repo patterns (e.g., facebook/react, microsoft/typescript)
|
|
275
486
|
const orgRepoRe = /\b([a-z][\w-]{1,30})\/([a-z][\w-]{1,30})\b/g;
|
|
276
487
|
let m;
|
|
277
|
-
while ((m = orgRepoRe.exec(
|
|
488
|
+
while ((m = orgRepoRe.exec(cleanContent)) !== null) {
|
|
278
489
|
const org = m[1].toLowerCase();
|
|
279
490
|
const repo = m[2].toLowerCase();
|
|
280
491
|
if (EntityExtractor.MIME_PREFIXES.has(org))
|
|
281
492
|
continue;
|
|
282
493
|
if (EntityExtractor.PATH_SEGMENTS.has(org))
|
|
283
494
|
continue;
|
|
495
|
+
if (EntityExtractor.PATH_SEGMENTS.has(repo))
|
|
496
|
+
continue;
|
|
284
497
|
const fullName = `${org}/${repo}`;
|
|
285
498
|
if (!this.isStopWord(org) && !this.isStopWord(repo)) {
|
|
286
499
|
results.push({ name: fullName, entityType: "project", sourceType: "code" });
|
|
287
500
|
}
|
|
288
501
|
}
|
|
502
|
+
// Also extract org/repo from GitHub URLs specifically
|
|
503
|
+
results.push(...this.extractOrgReposFromUrls(content));
|
|
504
|
+
return results;
|
|
505
|
+
}
|
|
506
|
+
/**
|
|
507
|
+
* Strip URLs from content, replacing them with whitespace to prevent
|
|
508
|
+
* URL path fragments from being matched as org/repo.
|
|
509
|
+
*/
|
|
510
|
+
stripUrls(content) {
|
|
511
|
+
// Match http(s):// URLs and protocol-relative //
|
|
512
|
+
return content.replace(/(?:https?:\/\/|\/\/)[^\s)>\]]+/g, " ");
|
|
513
|
+
}
|
|
514
|
+
/**
|
|
515
|
+
* Extract org/repo from GitHub URLs (e.g., github.com/org/repo/...).
|
|
516
|
+
* Only extracts the org/repo portion, not deeper path segments.
|
|
517
|
+
*/
|
|
518
|
+
extractOrgReposFromUrls(content) {
|
|
519
|
+
const results = [];
|
|
520
|
+
const githubUrlRe = /github\.com\/([a-z][\w-]{1,30})\/([a-z][\w-]{1,30})/gi;
|
|
521
|
+
let m;
|
|
522
|
+
while ((m = githubUrlRe.exec(content)) !== null) {
|
|
523
|
+
const org = m[1].toLowerCase();
|
|
524
|
+
const repo = m[2].toLowerCase();
|
|
525
|
+
if (EntityExtractor.MIME_PREFIXES.has(org))
|
|
526
|
+
continue;
|
|
527
|
+
if (EntityExtractor.PATH_SEGMENTS.has(org))
|
|
528
|
+
continue;
|
|
529
|
+
if (EntityExtractor.PATH_SEGMENTS.has(repo))
|
|
530
|
+
continue;
|
|
531
|
+
if (this.isStopWord(org) || this.isStopWord(repo))
|
|
532
|
+
continue;
|
|
533
|
+
results.push({ name: `${org}/${repo}`, entityType: "project", sourceType: "code" });
|
|
534
|
+
}
|
|
289
535
|
return results;
|
|
290
536
|
}
|
|
291
537
|
isStopWord(word) {
|
|
292
538
|
return STOP_LIST.has(word.toLowerCase());
|
|
293
539
|
}
|
|
540
|
+
isIdentifierChar(char) {
|
|
541
|
+
return ((char >= "a" && char <= "z") ||
|
|
542
|
+
(char >= "A" && char <= "Z") ||
|
|
543
|
+
(char >= "0" && char <= "9") ||
|
|
544
|
+
char === "_" ||
|
|
545
|
+
char === "$");
|
|
546
|
+
}
|
|
294
547
|
resolveEntityTypes(entities) {
|
|
295
548
|
const byName = new Map();
|
|
296
549
|
for (const e of entities) {
|