@knowledgine/core 0.0.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config/config-loader.d.ts +15 -0
- package/dist/config/config-loader.d.ts.map +1 -0
- package/dist/config/config-loader.js +46 -0
- package/dist/config/config-loader.js.map +1 -0
- package/dist/config.d.ts +16 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +17 -0
- package/dist/config.js.map +1 -1
- package/dist/embedding/embedding-provider.d.ts +9 -0
- package/dist/embedding/embedding-provider.d.ts.map +1 -0
- package/dist/embedding/embedding-provider.js +2 -0
- package/dist/embedding/embedding-provider.js.map +1 -0
- package/dist/embedding/model-downloader.d.ts +34 -0
- package/dist/embedding/model-downloader.d.ts.map +1 -0
- package/dist/embedding/model-downloader.js +149 -0
- package/dist/embedding/model-downloader.js.map +1 -0
- package/dist/embedding/model-manager.d.ts +10 -0
- package/dist/embedding/model-manager.d.ts.map +1 -0
- package/dist/embedding/model-manager.js +32 -0
- package/dist/embedding/model-manager.js.map +1 -0
- package/dist/embedding/onnx-embedding-provider.d.ts +18 -0
- package/dist/embedding/onnx-embedding-provider.d.ts.map +1 -0
- package/dist/embedding/onnx-embedding-provider.js +121 -0
- package/dist/embedding/onnx-embedding-provider.js.map +1 -0
- package/dist/embedding/tokenizer.d.ts +19 -0
- package/dist/embedding/tokenizer.d.ts.map +1 -0
- package/dist/embedding/tokenizer.js +80 -0
- package/dist/embedding/tokenizer.js.map +1 -0
- package/dist/errors.d.ts +12 -0
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +21 -0
- package/dist/errors.js.map +1 -1
- package/dist/feedback/feedback-learner.d.ts +29 -0
- package/dist/feedback/feedback-learner.d.ts.map +1 -0
- package/dist/feedback/feedback-learner.js +88 -0
- package/dist/feedback/feedback-learner.js.map +1 -0
- package/dist/feedback/feedback-repository.d.ts +41 -0
- package/dist/feedback/feedback-repository.d.ts.map +1 -0
- package/dist/feedback/feedback-repository.js +94 -0
- package/dist/feedback/feedback-repository.js.map +1 -0
- package/dist/feedback/index.d.ts +5 -0
- package/dist/feedback/index.d.ts.map +1 -0
- package/dist/feedback/index.js +3 -0
- package/dist/feedback/index.js.map +1 -0
- package/dist/graph/entity-extractor.d.ts +26 -0
- package/dist/graph/entity-extractor.d.ts.map +1 -0
- package/dist/graph/entity-extractor.js +237 -0
- package/dist/graph/entity-extractor.js.map +1 -0
- package/dist/graph/graph-repository.d.ts +72 -0
- package/dist/graph/graph-repository.d.ts.map +1 -0
- package/dist/graph/graph-repository.js +442 -0
- package/dist/graph/graph-repository.js.map +1 -0
- package/dist/graph/index.d.ts +4 -0
- package/dist/graph/index.d.ts.map +1 -0
- package/dist/graph/index.js +4 -0
- package/dist/graph/index.js.map +1 -0
- package/dist/graph/relation-inferrer.d.ts +23 -0
- package/dist/graph/relation-inferrer.d.ts.map +1 -0
- package/dist/graph/relation-inferrer.js +89 -0
- package/dist/graph/relation-inferrer.js.map +1 -0
- package/dist/index.d.ts +33 -5
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +37 -4
- package/dist/index.js.map +1 -1
- package/dist/provenance/provenance-repository.d.ts +47 -0
- package/dist/provenance/provenance-repository.d.ts.map +1 -0
- package/dist/provenance/provenance-repository.js +121 -0
- package/dist/provenance/provenance-repository.js.map +1 -0
- package/dist/search/hybrid-searcher.d.ts +14 -0
- package/dist/search/hybrid-searcher.d.ts.map +1 -0
- package/dist/search/hybrid-searcher.js +67 -0
- package/dist/search/hybrid-searcher.js.map +1 -0
- package/dist/search/knowledge-searcher.d.ts +8 -4
- package/dist/search/knowledge-searcher.d.ts.map +1 -1
- package/dist/search/knowledge-searcher.js +47 -16
- package/dist/search/knowledge-searcher.js.map +1 -1
- package/dist/search/link-generator.d.ts +7 -1
- package/dist/search/link-generator.d.ts.map +1 -1
- package/dist/search/link-generator.js +37 -1
- package/dist/search/link-generator.js.map +1 -1
- package/dist/search/semantic-searcher.d.ts +14 -0
- package/dist/search/semantic-searcher.d.ts.map +1 -0
- package/dist/search/semantic-searcher.js +30 -0
- package/dist/search/semantic-searcher.js.map +1 -0
- package/dist/services/knowledge-service.d.ts +128 -0
- package/dist/services/knowledge-service.d.ts.map +1 -0
- package/dist/services/knowledge-service.js +168 -0
- package/dist/services/knowledge-service.js.map +1 -0
- package/dist/storage/database.d.ts +9 -1
- package/dist/storage/database.d.ts.map +1 -1
- package/dist/storage/database.js +28 -1
- package/dist/storage/database.js.map +1 -1
- package/dist/storage/knowledge-repository.d.ts +27 -0
- package/dist/storage/knowledge-repository.d.ts.map +1 -1
- package/dist/storage/knowledge-repository.js +107 -3
- package/dist/storage/knowledge-repository.js.map +1 -1
- package/dist/storage/migrations/003_vector_embeddings.d.ts +3 -0
- package/dist/storage/migrations/003_vector_embeddings.d.ts.map +1 -0
- package/dist/storage/migrations/003_vector_embeddings.js +50 -0
- package/dist/storage/migrations/003_vector_embeddings.js.map +1 -0
- package/dist/storage/migrations/004_knowledge_graph.d.ts +3 -0
- package/dist/storage/migrations/004_knowledge_graph.d.ts.map +1 -0
- package/dist/storage/migrations/004_knowledge_graph.js +106 -0
- package/dist/storage/migrations/004_knowledge_graph.js.map +1 -0
- package/dist/storage/migrations/005a_events_layer.d.ts +3 -0
- package/dist/storage/migrations/005a_events_layer.d.ts.map +1 -0
- package/dist/storage/migrations/005a_events_layer.js +55 -0
- package/dist/storage/migrations/005a_events_layer.js.map +1 -0
- package/dist/storage/migrations/005b_bitemporal.d.ts +3 -0
- package/dist/storage/migrations/005b_bitemporal.d.ts.map +1 -0
- package/dist/storage/migrations/005b_bitemporal.js +53 -0
- package/dist/storage/migrations/005b_bitemporal.js.map +1 -0
- package/dist/storage/migrations/005c_provenance.d.ts +3 -0
- package/dist/storage/migrations/005c_provenance.d.ts.map +1 -0
- package/dist/storage/migrations/005c_provenance.js +56 -0
- package/dist/storage/migrations/005c_provenance.js.map +1 -0
- package/dist/storage/migrations/006_extraction_feedback.d.ts +3 -0
- package/dist/storage/migrations/006_extraction_feedback.d.ts.map +1 -0
- package/dist/storage/migrations/006_extraction_feedback.js +30 -0
- package/dist/storage/migrations/006_extraction_feedback.js.map +1 -0
- package/dist/types.d.ts +40 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +15 -4
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { KnowledgineConfig } from "../config.js";
|
|
2
|
+
export interface RcConfig {
|
|
3
|
+
semantic?: boolean;
|
|
4
|
+
[key: string]: unknown;
|
|
5
|
+
}
|
|
6
|
+
/**
|
|
7
|
+
* Load knowledgine configuration from RC file and environment variables.
|
|
8
|
+
* Priority: env var > RC file > defaults (embedding.enabled = false)
|
|
9
|
+
*/
|
|
10
|
+
export declare function loadConfig(rootPath: string): KnowledgineConfig;
|
|
11
|
+
/**
|
|
12
|
+
* Write a .knowledginerc.json config file to the project root.
|
|
13
|
+
*/
|
|
14
|
+
export declare function writeRcConfig(rootPath: string, config: RcConfig): void;
|
|
15
|
+
//# sourceMappingURL=config-loader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config-loader.d.ts","sourceRoot":"","sources":["../../src/config/config-loader.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,cAAc,CAAC;AAEtD,MAAM,WAAW,QAAQ;IACvB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED;;;GAGG;AACH,wBAAgB,UAAU,CAAC,QAAQ,EAAE,MAAM,GAAG,iBAAiB,CAgB9D;AAqBD;;GAEG;AACH,wBAAgB,aAAa,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,GAAG,IAAI,CAGtE"}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import { readFileSync, existsSync, writeFileSync } from "fs";
|
|
2
|
+
import { resolve } from "path";
|
|
3
|
+
import { parse as parseYaml } from "yaml";
|
|
4
|
+
import { defineConfig } from "../config.js";
|
|
5
|
+
/**
|
|
6
|
+
* Load knowledgine configuration from RC file and environment variables.
|
|
7
|
+
* Priority: env var > RC file > defaults (embedding.enabled = false)
|
|
8
|
+
*/
|
|
9
|
+
export function loadConfig(rootPath) {
|
|
10
|
+
const rcConfig = loadRcFile(rootPath);
|
|
11
|
+
// Environment variable override
|
|
12
|
+
const envSemantic = process.env["KNOWLEDGINE_SEMANTIC"];
|
|
13
|
+
const semanticEnabled = envSemantic === "true" || envSemantic === "1" || rcConfig?.semantic === true;
|
|
14
|
+
return defineConfig({
|
|
15
|
+
rootPath,
|
|
16
|
+
embedding: {
|
|
17
|
+
modelName: "all-MiniLM-L6-v2",
|
|
18
|
+
dimensions: 384,
|
|
19
|
+
enabled: semanticEnabled,
|
|
20
|
+
},
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
function loadRcFile(rootPath) {
|
|
24
|
+
const jsonPath = resolve(rootPath, ".knowledginerc.json");
|
|
25
|
+
const ymlPath = resolve(rootPath, ".knowledginerc.yml");
|
|
26
|
+
try {
|
|
27
|
+
if (existsSync(jsonPath)) {
|
|
28
|
+
return JSON.parse(readFileSync(jsonPath, "utf-8"));
|
|
29
|
+
}
|
|
30
|
+
if (existsSync(ymlPath)) {
|
|
31
|
+
return parseYaml(readFileSync(ymlPath, "utf-8"));
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
catch (error) {
|
|
35
|
+
console.error(`Warning: Failed to parse config file: ${error instanceof Error ? error.message : String(error)}`);
|
|
36
|
+
}
|
|
37
|
+
return null;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Write a .knowledginerc.json config file to the project root.
|
|
41
|
+
*/
|
|
42
|
+
export function writeRcConfig(rootPath, config) {
|
|
43
|
+
const rcPath = resolve(rootPath, ".knowledginerc.json");
|
|
44
|
+
writeFileSync(rcPath, JSON.stringify(config, null, 2) + "\n");
|
|
45
|
+
}
|
|
46
|
+
//# sourceMappingURL=config-loader.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config-loader.js","sourceRoot":"","sources":["../../src/config/config-loader.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAC7D,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAC/B,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,MAAM,CAAC;AAC1C,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAQ5C;;;GAGG;AACH,MAAM,UAAU,UAAU,CAAC,QAAgB;IACzC,MAAM,QAAQ,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC;IAEtC,gCAAgC;IAChC,MAAM,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC,CAAC;IACxD,MAAM,eAAe,GACnB,WAAW,KAAK,MAAM,IAAI,WAAW,KAAK,GAAG,IAAI,QAAQ,EAAE,QAAQ,KAAK,IAAI,CAAC;IAE/E,OAAO,YAAY,CAAC;QAClB,QAAQ;QACR,SAAS,EAAE;YACT,SAAS,EAAE,kBAAkB;YAC7B,UAAU,EAAE,GAAG;YACf,OAAO,EAAE,eAAe;SACzB;KACF,CAAC,CAAC;AACL,CAAC;AAED,SAAS,UAAU,CAAC,QAAgB;IAClC,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,EAAE,qBAAqB,CAAC,CAAC;IAC1D,MAAM,OAAO,GAAG,OAAO,CAAC,QAAQ,EAAE,oBAAoB,CAAC,CAAC;IAExD,IAAI,CAAC;QACH,IAAI,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YACzB,OAAO,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAa,CAAC;QACjE,CAAC;QACD,IAAI,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;YACxB,OAAO,SAAS,CAAC,YAAY,CAAC,OAAO,EAAE,OAAO,CAAC,CAAa,CAAC;QAC/D,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CACX,yCAAyC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAClG,CAAC;IACJ,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,aAAa,CAAC,QAAgB,EAAE,MAAgB;IAC9D,MAAM,MAAM,GAAG,OAAO,CAAC,QAAQ,EAAE,qBAAqB,CAAC,CAAC;IACxD,aAAa,CAAC,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC;AAChE,CAAC"}
|
package/dist/config.d.ts
CHANGED
|
@@ -1,4 +1,18 @@
|
|
|
1
1
|
export type PatternCategory = "problem" | "solution" | "learning" | "time";
|
|
2
|
+
export interface EmbeddingConfig {
|
|
3
|
+
/** 使用するモデル名 */
|
|
4
|
+
modelName: string;
|
|
5
|
+
/** 埋め込み次元数 */
|
|
6
|
+
dimensions: number;
|
|
7
|
+
/** 埋め込み生成を有効にするか */
|
|
8
|
+
enabled: boolean;
|
|
9
|
+
}
|
|
10
|
+
export interface SearchConfig {
|
|
11
|
+
/** デフォルト検索モード */
|
|
12
|
+
defaultMode: "keyword" | "semantic" | "hybrid";
|
|
13
|
+
/** ハイブリッド検索のFTSスコア重み (0-1) */
|
|
14
|
+
hybridAlpha: number;
|
|
15
|
+
}
|
|
2
16
|
export interface KnowledgineConfig {
|
|
3
17
|
rootPath: string;
|
|
4
18
|
dbPath: string;
|
|
@@ -8,6 +22,8 @@ export interface KnowledgineConfig {
|
|
|
8
22
|
frontmatter: {
|
|
9
23
|
requiredFields: string[];
|
|
10
24
|
};
|
|
25
|
+
embedding: EmbeddingConfig;
|
|
26
|
+
search: SearchConfig;
|
|
11
27
|
}
|
|
12
28
|
export declare function defineConfig(partial?: Partial<KnowledgineConfig>): KnowledgineConfig;
|
|
13
29
|
//# sourceMappingURL=config.d.ts.map
|
package/dist/config.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAEA,MAAM,MAAM,eAAe,GAAG,SAAS,GAAG,UAAU,GAAG,UAAU,GAAG,MAAM,CAAC;AAE3E,MAAM,WAAW,iBAAiB;IAChC,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE;QACR,OAAO,EAAE,eAAe,EAAE,CAAC;KAC5B,CAAC;IACF,WAAW,EAAE;QACX,cAAc,EAAE,MAAM,EAAE,CAAC;KAC1B,CAAC;
|
|
1
|
+
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAEA,MAAM,MAAM,eAAe,GAAG,SAAS,GAAG,UAAU,GAAG,UAAU,GAAG,MAAM,CAAC;AAE3E,MAAM,WAAW,eAAe;IAC9B,eAAe;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,cAAc;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,oBAAoB;IACpB,OAAO,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,WAAW,YAAY;IAC3B,iBAAiB;IACjB,WAAW,EAAE,SAAS,GAAG,UAAU,GAAG,QAAQ,CAAC;IAC/C,8BAA8B;IAC9B,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,iBAAiB;IAChC,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE;QACR,OAAO,EAAE,eAAe,EAAE,CAAC;KAC5B,CAAC;IACF,WAAW,EAAE;QACX,cAAc,EAAE,MAAM,EAAE,CAAC;KAC1B,CAAC;IACF,SAAS,EAAE,eAAe,CAAC;IAC3B,MAAM,EAAE,YAAY,CAAC;CACtB;AAsBD,wBAAgB,YAAY,CAAC,OAAO,GAAE,OAAO,CAAC,iBAAiB,CAAM,GAAG,iBAAiB,CA0BxF"}
|
package/dist/config.js
CHANGED
|
@@ -8,6 +8,15 @@ const DEFAULT_CONFIG = {
|
|
|
8
8
|
frontmatter: {
|
|
9
9
|
requiredFields: [],
|
|
10
10
|
},
|
|
11
|
+
embedding: {
|
|
12
|
+
modelName: "all-MiniLM-L6-v2",
|
|
13
|
+
dimensions: 384,
|
|
14
|
+
enabled: false,
|
|
15
|
+
},
|
|
16
|
+
search: {
|
|
17
|
+
defaultMode: "keyword",
|
|
18
|
+
hybridAlpha: 0.3,
|
|
19
|
+
},
|
|
11
20
|
};
|
|
12
21
|
export function defineConfig(partial = {}) {
|
|
13
22
|
const rootPath = partial.rootPath ?? DEFAULT_CONFIG.rootPath;
|
|
@@ -25,6 +34,14 @@ export function defineConfig(partial = {}) {
|
|
|
25
34
|
...DEFAULT_CONFIG.frontmatter,
|
|
26
35
|
...partial.frontmatter,
|
|
27
36
|
},
|
|
37
|
+
embedding: {
|
|
38
|
+
...DEFAULT_CONFIG.embedding,
|
|
39
|
+
...partial.embedding,
|
|
40
|
+
},
|
|
41
|
+
search: {
|
|
42
|
+
...DEFAULT_CONFIG.search,
|
|
43
|
+
...partial.search,
|
|
44
|
+
},
|
|
28
45
|
};
|
|
29
46
|
}
|
|
30
47
|
//# sourceMappingURL=config.js.map
|
package/dist/config.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAiC/B,MAAM,cAAc,GAAsB;IACxC,QAAQ,EAAE,GAAG;IACb,MAAM,EAAE,EAAE;IACV,QAAQ,EAAE;QACR,OAAO,EAAE,CAAC,SAAS,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,CAAC;KACrD;IACD,WAAW,EAAE;QACX,cAAc,EAAE,EAAE;KACnB;IACD,SAAS,EAAE;QACT,SAAS,EAAE,kBAAkB;QAC7B,UAAU,EAAE,GAAG;QACf,OAAO,EAAE,KAAK;KACf;IACD,MAAM,EAAE;QACN,WAAW,EAAE,SAAS;QACtB,WAAW,EAAE,GAAG;KACjB;CACF,CAAC;AAEF,MAAM,UAAU,YAAY,CAAC,UAAsC,EAAE;IACnE,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,cAAc,CAAC,QAAQ,CAAC;IAC7D,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,OAAO,CAAC,QAAQ,EAAE,cAAc,EAAE,cAAc,CAAC,CAAC;IAEnF,OAAO;QACL,GAAG,cAAc;QACjB,GAAG,OAAO;QACV,QAAQ;QACR,MAAM;QACN,QAAQ,EAAE;YACR,GAAG,cAAc,CAAC,QAAQ;YAC1B,GAAG,OAAO,CAAC,QAAQ;SACpB;QACD,WAAW,EAAE;YACX,GAAG,cAAc,CAAC,WAAW;YAC7B,GAAG,OAAO,CAAC,WAAW;SACvB;QACD,SAAS,EAAE;YACT,GAAG,cAAc,CAAC,SAAS;YAC3B,GAAG,OAAO,CAAC,SAAS;SACrB;QACD,MAAM,EAAE;YACN,GAAG,cAAc,CAAC,MAAM;YACxB,GAAG,OAAO,CAAC,MAAM;SAClB;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export interface EmbeddingProvider {
|
|
2
|
+
/** テキストを埋め込みベクトルに変換する */
|
|
3
|
+
embed(text: string): Promise<Float32Array>;
|
|
4
|
+
/** 複数テキストをバッチ処理する */
|
|
5
|
+
embedBatch(texts: string[]): Promise<Float32Array[]>;
|
|
6
|
+
/** 埋め込みの次元数を返す */
|
|
7
|
+
getDimensions(): number;
|
|
8
|
+
}
|
|
9
|
+
//# sourceMappingURL=embedding-provider.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embedding-provider.d.ts","sourceRoot":"","sources":["../../src/embedding/embedding-provider.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,iBAAiB;IAChC,yBAAyB;IACzB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC;IAC3C,qBAAqB;IACrB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;IACrD,kBAAkB;IAClB,aAAa,IAAI,MAAM,CAAC;CACzB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embedding-provider.js","sourceRoot":"","sources":["../../src/embedding/embedding-provider.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding model downloader.
|
|
3
|
+
* Downloads all-MiniLM-L6-v2 ONNX model files from HuggingFace.
|
|
4
|
+
*
|
|
5
|
+
* Features:
|
|
6
|
+
* - Atomic download (.tmp + rename)
|
|
7
|
+
* - Redirect limit (max 5)
|
|
8
|
+
* - Timeout (5 minutes)
|
|
9
|
+
* - Progress callback
|
|
10
|
+
* - Skip existing files (0-byte treated as corrupt)
|
|
11
|
+
* - SIGINT cleanup
|
|
12
|
+
*/
|
|
13
|
+
import type { ModelManager } from "./model-manager.js";
|
|
14
|
+
export interface DownloadProgress {
|
|
15
|
+
file: string;
|
|
16
|
+
downloaded: number;
|
|
17
|
+
total: number | null;
|
|
18
|
+
}
|
|
19
|
+
export interface DownloadOptions {
|
|
20
|
+
onProgress?: (progress: DownloadProgress) => void;
|
|
21
|
+
onFileComplete?: (file: string) => void;
|
|
22
|
+
timeoutMs?: number;
|
|
23
|
+
maxRedirects?: number;
|
|
24
|
+
}
|
|
25
|
+
export interface ModelFile {
|
|
26
|
+
url: string;
|
|
27
|
+
dest: string;
|
|
28
|
+
}
|
|
29
|
+
export declare const MODEL_FILES: ModelFile[];
|
|
30
|
+
export declare function downloadModel(modelManager: ModelManager, options?: DownloadOptions): Promise<{
|
|
31
|
+
downloaded: string[];
|
|
32
|
+
skipped: string[];
|
|
33
|
+
}>;
|
|
34
|
+
//# sourceMappingURL=model-downloader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"model-downloader.d.ts","sourceRoot":"","sources":["../../src/embedding/model-downloader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAMH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAEvD,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;CACtB;AAED,MAAM,WAAW,eAAe;IAC9B,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,gBAAgB,KAAK,IAAI,CAAC;IAClD,cAAc,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;IACxC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,SAAS;IACxB,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;CACd;AAED,eAAO,MAAM,WAAW,EAAE,SAAS,EAalC,CAAC;AAyHF,wBAAsB,aAAa,CACjC,YAAY,EAAE,YAAY,EAC1B,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC;IAAE,UAAU,EAAE,MAAM,EAAE,CAAC;IAAC,OAAO,EAAE,MAAM,EAAE,CAAA;CAAE,CAAC,CAqBtD"}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding model downloader.
|
|
3
|
+
* Downloads all-MiniLM-L6-v2 ONNX model files from HuggingFace.
|
|
4
|
+
*
|
|
5
|
+
* Features:
|
|
6
|
+
* - Atomic download (.tmp + rename)
|
|
7
|
+
* - Redirect limit (max 5)
|
|
8
|
+
* - Timeout (5 minutes)
|
|
9
|
+
* - Progress callback
|
|
10
|
+
* - Skip existing files (0-byte treated as corrupt)
|
|
11
|
+
* - SIGINT cleanup
|
|
12
|
+
*/
|
|
13
|
+
import { mkdirSync, renameSync, unlinkSync, existsSync, statSync, createWriteStream } from "fs";
|
|
14
|
+
import { get as httpsGet } from "https";
|
|
15
|
+
import { get as httpGet } from "http";
|
|
16
|
+
import { pipeline } from "stream/promises";
|
|
17
|
+
export const MODEL_FILES = [
|
|
18
|
+
{
|
|
19
|
+
url: "https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/tokenizer.json",
|
|
20
|
+
dest: "tokenizer.json",
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
url: "https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/config.json",
|
|
24
|
+
dest: "config.json",
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
url: "https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/onnx/model_quantized.onnx",
|
|
28
|
+
dest: "model.onnx",
|
|
29
|
+
},
|
|
30
|
+
];
|
|
31
|
+
function isExistingAndValid(filePath) {
|
|
32
|
+
if (!existsSync(filePath))
|
|
33
|
+
return false;
|
|
34
|
+
const stat = statSync(filePath);
|
|
35
|
+
return stat.size > 0;
|
|
36
|
+
}
|
|
37
|
+
function downloadFile(url, destPath, options, fileName, redirectCount = 0) {
|
|
38
|
+
const maxRedirects = options.maxRedirects ?? 5;
|
|
39
|
+
const timeoutMs = options.timeoutMs ?? 300_000; // 5 minutes
|
|
40
|
+
if (redirectCount > maxRedirects) {
|
|
41
|
+
return Promise.reject(new Error(`Too many redirects (${maxRedirects}) for ${url}`));
|
|
42
|
+
}
|
|
43
|
+
return new Promise((resolve, reject) => {
|
|
44
|
+
const tmpPath = destPath + ".tmp";
|
|
45
|
+
let aborted = false;
|
|
46
|
+
const cleanup = () => {
|
|
47
|
+
try {
|
|
48
|
+
if (existsSync(tmpPath))
|
|
49
|
+
unlinkSync(tmpPath);
|
|
50
|
+
}
|
|
51
|
+
catch {
|
|
52
|
+
// ignore cleanup errors
|
|
53
|
+
}
|
|
54
|
+
};
|
|
55
|
+
const onSigint = () => {
|
|
56
|
+
aborted = true;
|
|
57
|
+
cleanup();
|
|
58
|
+
};
|
|
59
|
+
process.on("SIGINT", onSigint);
|
|
60
|
+
const getFunc = url.startsWith("http://") ? httpGet : httpsGet;
|
|
61
|
+
const timer = setTimeout(() => {
|
|
62
|
+
aborted = true;
|
|
63
|
+
req.destroy();
|
|
64
|
+
cleanup();
|
|
65
|
+
reject(new Error(`Download timeout (${timeoutMs}ms) for ${fileName}`));
|
|
66
|
+
}, timeoutMs);
|
|
67
|
+
const req = getFunc(url, (response) => {
|
|
68
|
+
if (aborted)
|
|
69
|
+
return;
|
|
70
|
+
// Handle redirects (301, 302, 307, 308)
|
|
71
|
+
if (response.statusCode === 301 || response.statusCode === 302 || response.statusCode === 307 || response.statusCode === 308) {
|
|
72
|
+
clearTimeout(timer);
|
|
73
|
+
process.removeListener("SIGINT", onSigint);
|
|
74
|
+
const location = response.headers.location;
|
|
75
|
+
if (!location) {
|
|
76
|
+
reject(new Error(`Redirect without location header for ${url}`));
|
|
77
|
+
return;
|
|
78
|
+
}
|
|
79
|
+
downloadFile(location, destPath, options, fileName, redirectCount + 1)
|
|
80
|
+
.then(resolve)
|
|
81
|
+
.catch(reject);
|
|
82
|
+
return;
|
|
83
|
+
}
|
|
84
|
+
if (response.statusCode !== 200) {
|
|
85
|
+
clearTimeout(timer);
|
|
86
|
+
process.removeListener("SIGINT", onSigint);
|
|
87
|
+
reject(new Error(`HTTP ${response.statusCode} for ${fileName}`));
|
|
88
|
+
return;
|
|
89
|
+
}
|
|
90
|
+
const totalSize = response.headers["content-length"]
|
|
91
|
+
? parseInt(response.headers["content-length"], 10)
|
|
92
|
+
: null;
|
|
93
|
+
let downloaded = 0;
|
|
94
|
+
response.on("data", (chunk) => {
|
|
95
|
+
downloaded += chunk.length;
|
|
96
|
+
options.onProgress?.({
|
|
97
|
+
file: fileName,
|
|
98
|
+
downloaded,
|
|
99
|
+
total: totalSize,
|
|
100
|
+
});
|
|
101
|
+
});
|
|
102
|
+
const fileStream = createWriteStream(tmpPath);
|
|
103
|
+
pipeline(response, fileStream)
|
|
104
|
+
.then(() => {
|
|
105
|
+
clearTimeout(timer);
|
|
106
|
+
process.removeListener("SIGINT", onSigint);
|
|
107
|
+
if (aborted) {
|
|
108
|
+
cleanup();
|
|
109
|
+
reject(new Error("Download aborted"));
|
|
110
|
+
return;
|
|
111
|
+
}
|
|
112
|
+
// Atomic rename
|
|
113
|
+
renameSync(tmpPath, destPath);
|
|
114
|
+
options.onFileComplete?.(fileName);
|
|
115
|
+
resolve();
|
|
116
|
+
})
|
|
117
|
+
.catch((err) => {
|
|
118
|
+
clearTimeout(timer);
|
|
119
|
+
process.removeListener("SIGINT", onSigint);
|
|
120
|
+
cleanup();
|
|
121
|
+
reject(err);
|
|
122
|
+
});
|
|
123
|
+
});
|
|
124
|
+
req.on("error", (err) => {
|
|
125
|
+
clearTimeout(timer);
|
|
126
|
+
process.removeListener("SIGINT", onSigint);
|
|
127
|
+
cleanup();
|
|
128
|
+
reject(err);
|
|
129
|
+
});
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
export async function downloadModel(modelManager, options = {}) {
|
|
133
|
+
const modelDir = modelManager.getModelDir();
|
|
134
|
+
mkdirSync(modelDir, { recursive: true });
|
|
135
|
+
const downloaded = [];
|
|
136
|
+
const skipped = [];
|
|
137
|
+
for (const file of MODEL_FILES) {
|
|
138
|
+
const destPath = `${modelDir}/${file.dest}`;
|
|
139
|
+
if (isExistingAndValid(destPath)) {
|
|
140
|
+
skipped.push(file.dest);
|
|
141
|
+
options.onFileComplete?.(file.dest);
|
|
142
|
+
continue;
|
|
143
|
+
}
|
|
144
|
+
await downloadFile(file.url, destPath, options, file.dest);
|
|
145
|
+
downloaded.push(file.dest);
|
|
146
|
+
}
|
|
147
|
+
return { downloaded, skipped };
|
|
148
|
+
}
|
|
149
|
+
//# sourceMappingURL=model-downloader.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"model-downloader.js","sourceRoot":"","sources":["../../src/embedding/model-downloader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,SAAS,EAAE,UAAU,EAAE,UAAU,EAAE,UAAU,EAAE,QAAQ,EAAE,iBAAiB,EAAE,MAAM,IAAI,CAAC;AAChG,OAAO,EAAE,GAAG,IAAI,QAAQ,EAAE,MAAM,OAAO,CAAC;AACxC,OAAO,EAAE,GAAG,IAAI,OAAO,EAAE,MAAM,MAAM,CAAC;AACtC,OAAO,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAC;AAqB3C,MAAM,CAAC,MAAM,WAAW,GAAgB;IACtC;QACE,GAAG,EAAE,2FAA2F;QAChG,IAAI,EAAE,gBAAgB;KACvB;IACD;QACE,GAAG,EAAE,wFAAwF;QAC7F,IAAI,EAAE,aAAa;KACpB;IACD;QACE,GAAG,EAAE,sGAAsG;QAC3G,IAAI,EAAE,YAAY;KACnB;CACF,CAAC;AAEF,SAAS,kBAAkB,CAAC,QAAgB;IAC1C,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC;QAAE,OAAO,KAAK,CAAC;IACxC,MAAM,IAAI,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAChC,OAAO,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC;AACvB,CAAC;AAED,SAAS,YAAY,CACnB,GAAW,EACX,QAAgB,EAChB,OAAwB,EACxB,QAAgB,EAChB,gBAAwB,CAAC;IAEzB,MAAM,YAAY,GAAG,OAAO,CAAC,YAAY,IAAI,CAAC,CAAC;IAC/C,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,CAAC,YAAY;IAE5D,IAAI,aAAa,GAAG,YAAY,EAAE,CAAC;QACjC,OAAO,OAAO,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,uBAAuB,YAAY,SAAS,GAAG,EAAE,CAAC,CAAC,CAAC;IACtF,CAAC;IAED,OAAO,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QAC3C,MAAM,OAAO,GAAG,QAAQ,GAAG,MAAM,CAAC;QAClC,IAAI,OAAO,GAAG,KAAK,CAAC;QAEpB,MAAM,OAAO,GAAG,GAAS,EAAE;YACzB,IAAI,CAAC;gBACH,IAAI,UAAU,CAAC,OAAO,CAAC;oBAAE,UAAU,CAAC,OAAO,CAAC,CAAC;YAC/C,CAAC;YAAC,MAAM,CAAC;gBACP,wBAAwB;YAC1B,CAAC;QACH,CAAC,CAAC;QAEF,MAAM,QAAQ,GAAG,GAAS,EAAE;YAC1B,OAAO,GAAG,IAAI,CAAC;YACf,OAAO,EAAE,CAAC;QACZ,CAAC,CAAC;QACF,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;QAE/B,MAAM,OAAO,GAAG,GAAG,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC;QAE/D,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE;YAC5B,OAAO,GAAG,IAAI,CAAC;YACf,GAAG,CAAC,OAAO,EAAE,CAAC;YACd,OAAO,EAAE,CAAC;YACV,MAAM,CAAC,IAAI,KAAK,CAAC,qBAAqB,SAAS,WAAW,QAAQ,EAAE,CAAC,CAAC,CAAC;QACzE,CAAC,EAAE,SAAS,CAAC,CAAC;QAEd,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,EAAE;YACpC,IAAI,OAAO;gBAAE,OAAO;YAEpB,wCAAwC;YACxC,IAAI,QAAQ,CAAC,UAAU,KAAK,GAAG,IAAI,QAAQ,CAAC,UAAU,KAAK,GAAG,IAAI,QAAQ,CAAC,UAAU,KAAK,GAAG,IAAI,QAAQ,CAAC,UAAU,KAAK,GAAG,EAAE,CAAC;gBAC7H,YAAY,CAAC,KAAK,CAAC,CAAC;gBACpB,OAAO,CAAC,cAAc,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;gBAC3C,MAAM,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,QAAQ,CAAC;gBAC3C,IAAI,CAAC,QAAQ,EAAE,CAAC;oBACd,MAAM,CAAC,IAAI,KAAK,CAAC,wCAAwC,GAAG,EAAE,CAAC,CAAC,CAAC;oBACjE,OAAO;gBACT,CAAC;gBACD,YAAY,CAAC,QAAQ,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,GAAG,CAAC,CAAC;qBACnE,IAAI,CAAC,OAAO,CAAC;qBACb,KAAK,CAAC,MAAM,CAAC,CAAC;gBACjB,OAAO;YACT,CAAC;YAED,IAAI,QAAQ,CAAC,UAAU,KAAK,GAAG,EAAE,CAAC;gBAChC,YAAY,CAAC,KAAK,CAAC,CAAC;gBACpB,OAAO,CAAC,cAAc,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;gBAC3C,MAAM,CAAC,IAAI,KAAK,CAAC,QAAQ,QAAQ,CAAC,UAAU,QAAQ,QAAQ,EAAE,CAAC,CAAC,CAAC;gBACjE,OAAO;YACT,CAAC;YAED,MAAM,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC,gBAAgB,CAAC;gBAClD,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,gBAAgB,CAAC,EAAE,EAAE,CAAC;gBAClD,CAAC,CAAC,IAAI,CAAC;YAET,IAAI,UAAU,GAAG,CAAC,CAAC;YACnB,QAAQ,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAa,EAAE,EAAE;gBACpC,UAAU,IAAI,KAAK,CAAC,MAAM,CAAC;gBAC3B,OAAO,CAAC,UAAU,EAAE,CAAC;oBACnB,IAAI,EAAE,QAAQ;oBACd,UAAU;oBACV,KAAK,EAAE,SAAS;iBACjB,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;YAEH,MAAM,UAAU,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;YAE9C,QAAQ,CAAC,QAAQ,EAAE,UAAU,CAAC;iBAC3B,IAAI,CAAC,GAAG,EAAE;gBACT,YAAY,CAAC,KAAK,CAAC,CAAC;gBACpB,OAAO,CAAC,cAAc,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;gBAC3C,IAAI,OAAO,EAAE,CAAC;oBACZ,OAAO,EAAE,CAAC;oBACV,MAAM,CAAC,IAAI,KAAK,CAAC,kBAAkB,CAAC,CAAC,CAAC;oBACtC,OAAO;gBACT,CAAC;gBACD,gBAAgB;gBAChB,UAAU,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;gBAC9B,OAAO,CAAC,cAAc,EAAE,CAAC,QAAQ,CAAC,CAAC;gBACnC,OAAO,EAAE,CAAC;YACZ,CAAC,CAAC;iBACD,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;gBACb,YAAY,CAAC,KAAK,CAAC,CAAC;gBACpB,OAAO,CAAC,cAAc,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;gBAC3C,OAAO,EAAE,CAAC;gBACV,MAAM,CAAC,GAAG,CAAC,CAAC;YACd,CAAC,CAAC,CAAC;QACP,CAAC,CAAC,CAAC;QAEH,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE;YACtB,YAAY,CAAC,KAAK,CAAC,CAAC;YACpB,OAAO,CAAC,cAAc,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;YAC3C,OAAO,EAAE,CAAC;YACV,MAAM,CAAC,GAAG,CAAC,CAAC;QACd,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,YAA0B,EAC1B,UAA2B,EAAE;IAE7B,MAAM,QAAQ,GAAG,YAAY,CAAC,WAAW,EAAE,CAAC;IAC5C,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAEzC,MAAM,UAAU,GAAa,EAAE,CAAC;IAChC,MAAM,OAAO,GAAa,EAAE,CAAC;IAE7B,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;QAC/B,MAAM,QAAQ,GAAG,GAAG,QAAQ,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QAE5C,IAAI,kBAAkB,CAAC,QAAQ,CAAC,EAAE,CAAC;YACjC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACxB,OAAO,CAAC,cAAc,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACpC,SAAS;QACX,CAAC;QAED,MAAM,YAAY,CAAC,IAAI,CAAC,GAAG,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;QAC3D,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7B,CAAC;IAED,OAAO,EAAE,UAAU,EAAE,OAAO,EAAE,CAAC;AACjC,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export declare const DEFAULT_MODEL_NAME = "all-MiniLM-L6-v2";
|
|
2
|
+
export declare class ModelManager {
|
|
3
|
+
private modelsDir;
|
|
4
|
+
constructor(modelsDir?: string);
|
|
5
|
+
getModelDir(modelName?: string): string;
|
|
6
|
+
getModelPath(modelName?: string): string;
|
|
7
|
+
getTokenizerPath(modelName?: string): string;
|
|
8
|
+
isModelAvailable(modelName?: string): boolean;
|
|
9
|
+
}
|
|
10
|
+
//# sourceMappingURL=model-manager.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"model-manager.d.ts","sourceRoot":"","sources":["../../src/embedding/model-manager.ts"],"names":[],"mappings":"AAIA,eAAO,MAAM,kBAAkB,qBAAqB,CAAC;AAErD,qBAAa,YAAY;IACvB,OAAO,CAAC,SAAS,CAAS;gBAEd,SAAS,CAAC,EAAE,MAAM;IAY9B,WAAW,CAAC,SAAS,GAAE,MAA2B,GAAG,MAAM;IAI3D,YAAY,CAAC,SAAS,GAAE,MAA2B,GAAG,MAAM;IAI5D,gBAAgB,CAAC,SAAS,GAAE,MAA2B,GAAG,MAAM;IAIhE,gBAAgB,CAAC,SAAS,GAAE,MAA2B,GAAG,OAAO;CAGlE"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { existsSync } from "fs";
|
|
2
|
+
import { join, dirname } from "path";
|
|
3
|
+
import { fileURLToPath } from "url";
|
|
4
|
+
export const DEFAULT_MODEL_NAME = "all-MiniLM-L6-v2";
|
|
5
|
+
export class ModelManager {
|
|
6
|
+
modelsDir;
|
|
7
|
+
constructor(modelsDir) {
|
|
8
|
+
if (modelsDir) {
|
|
9
|
+
this.modelsDir = modelsDir;
|
|
10
|
+
}
|
|
11
|
+
else {
|
|
12
|
+
// Resolve models/ relative to the package root (one level above src/embedding/)
|
|
13
|
+
const currentDir = dirname(fileURLToPath(import.meta.url));
|
|
14
|
+
// In dist: dist/embedding/ -> package root
|
|
15
|
+
// In src: src/embedding/ -> package root
|
|
16
|
+
this.modelsDir = join(currentDir, "..", "..", "models");
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
getModelDir(modelName = DEFAULT_MODEL_NAME) {
|
|
20
|
+
return join(this.modelsDir, modelName);
|
|
21
|
+
}
|
|
22
|
+
getModelPath(modelName = DEFAULT_MODEL_NAME) {
|
|
23
|
+
return join(this.getModelDir(modelName), "model.onnx");
|
|
24
|
+
}
|
|
25
|
+
getTokenizerPath(modelName = DEFAULT_MODEL_NAME) {
|
|
26
|
+
return join(this.getModelDir(modelName), "tokenizer.json");
|
|
27
|
+
}
|
|
28
|
+
isModelAvailable(modelName = DEFAULT_MODEL_NAME) {
|
|
29
|
+
return existsSync(this.getModelPath(modelName)) && existsSync(this.getTokenizerPath(modelName));
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
//# sourceMappingURL=model-manager.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"model-manager.js","sourceRoot":"","sources":["../../src/embedding/model-manager.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC;AAChC,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AACrC,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AAEpC,MAAM,CAAC,MAAM,kBAAkB,GAAG,kBAAkB,CAAC;AAErD,MAAM,OAAO,YAAY;IACf,SAAS,CAAS;IAE1B,YAAY,SAAkB;QAC5B,IAAI,SAAS,EAAE,CAAC;YACd,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC7B,CAAC;aAAM,CAAC;YACN,gFAAgF;YAChF,MAAM,UAAU,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;YAC3D,2CAA2C;YAC3C,yCAAyC;YACzC,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,UAAU,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC;IAED,WAAW,CAAC,YAAoB,kBAAkB;QAChD,OAAO,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;IACzC,CAAC;IAED,YAAY,CAAC,YAAoB,kBAAkB;QACjD,OAAO,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,EAAE,YAAY,CAAC,CAAC;IACzD,CAAC;IAED,gBAAgB,CAAC,YAAoB,kBAAkB;QACrD,OAAO,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,EAAE,gBAAgB,CAAC,CAAC;IAC7D,CAAC;IAED,gBAAgB,CAAC,YAAoB,kBAAkB;QACrD,OAAO,UAAU,CAAC,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC,IAAI,UAAU,CAAC,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC,CAAC;IAClG,CAAC;CACF"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { EmbeddingProvider } from "./embedding-provider.js";
|
|
2
|
+
import { ModelManager } from "./model-manager.js";
|
|
3
|
+
export declare class OnnxEmbeddingProvider implements EmbeddingProvider {
|
|
4
|
+
private session;
|
|
5
|
+
private tokenizer;
|
|
6
|
+
private modelName;
|
|
7
|
+
private modelManager;
|
|
8
|
+
constructor(modelName?: string, modelManager?: ModelManager);
|
|
9
|
+
private getSession;
|
|
10
|
+
private getTokenizer;
|
|
11
|
+
embed(text: string): Promise<Float32Array>;
|
|
12
|
+
embedBatch(texts: string[]): Promise<Float32Array[]>;
|
|
13
|
+
getDimensions(): number;
|
|
14
|
+
close(): Promise<void>;
|
|
15
|
+
private meanPool;
|
|
16
|
+
private normalize;
|
|
17
|
+
}
|
|
18
|
+
//# sourceMappingURL=onnx-embedding-provider.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"onnx-embedding-provider.d.ts","sourceRoot":"","sources":["../../src/embedding/onnx-embedding-provider.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAEjE,OAAO,EAAE,YAAY,EAAsB,MAAM,oBAAoB,CAAC;AAKtE,qBAAa,qBAAsB,YAAW,iBAAiB;IAC7D,OAAO,CAAC,OAAO,CAAiC;IAChD,OAAO,CAAC,SAAS,CAAmC;IACpD,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,YAAY,CAAe;gBAEvB,SAAS,GAAE,MAA2B,EAAE,YAAY,CAAC,EAAE,YAAY;YAKjE,UAAU;IAwBxB,OAAO,CAAC,YAAY;IAWd,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAK1C,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAsD1D,aAAa,IAAI,MAAM;IAIjB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAO5B,OAAO,CAAC,QAAQ;IA0BhB,OAAO,CAAC,SAAS;CAalB"}
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import { WordPieceTokenizer } from "./tokenizer.js";
|
|
2
|
+
import { ModelManager, DEFAULT_MODEL_NAME } from "./model-manager.js";
|
|
3
|
+
import { EmbeddingNotAvailableError, EmbeddingError } from "../errors.js";
|
|
4
|
+
const DIMENSIONS = 384;
|
|
5
|
+
export class OnnxEmbeddingProvider {
|
|
6
|
+
session = null;
|
|
7
|
+
tokenizer = null;
|
|
8
|
+
modelName;
|
|
9
|
+
modelManager;
|
|
10
|
+
constructor(modelName = DEFAULT_MODEL_NAME, modelManager) {
|
|
11
|
+
this.modelName = modelName;
|
|
12
|
+
this.modelManager = modelManager ?? new ModelManager();
|
|
13
|
+
}
|
|
14
|
+
async getSession() {
|
|
15
|
+
if (this.session)
|
|
16
|
+
return this.session;
|
|
17
|
+
if (!this.modelManager.isModelAvailable(this.modelName)) {
|
|
18
|
+
throw new EmbeddingNotAvailableError(`Model "${this.modelName}" not found. Run 'knowledgine init' to download the model automatically.`);
|
|
19
|
+
}
|
|
20
|
+
try {
|
|
21
|
+
const ort = await import("onnxruntime-node");
|
|
22
|
+
this.session = await ort.InferenceSession.create(this.modelManager.getModelPath(this.modelName), { executionProviders: ["cpu"] });
|
|
23
|
+
return this.session;
|
|
24
|
+
}
|
|
25
|
+
catch (error) {
|
|
26
|
+
throw new EmbeddingError(`Failed to load ONNX model "${this.modelName}"`, error instanceof Error ? error : new Error(String(error)));
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
getTokenizer() {
|
|
30
|
+
if (this.tokenizer)
|
|
31
|
+
return this.tokenizer;
|
|
32
|
+
if (!this.modelManager.isModelAvailable(this.modelName)) {
|
|
33
|
+
throw new EmbeddingNotAvailableError(`Model "${this.modelName}" not found. Run 'knowledgine init' to download the model automatically.`);
|
|
34
|
+
}
|
|
35
|
+
this.tokenizer = new WordPieceTokenizer(this.modelManager.getTokenizerPath(this.modelName));
|
|
36
|
+
return this.tokenizer;
|
|
37
|
+
}
|
|
38
|
+
async embed(text) {
|
|
39
|
+
const [result] = await this.embedBatch([text]);
|
|
40
|
+
return result;
|
|
41
|
+
}
|
|
42
|
+
async embedBatch(texts) {
|
|
43
|
+
const session = await this.getSession();
|
|
44
|
+
const tokenizer = this.getTokenizer();
|
|
45
|
+
try {
|
|
46
|
+
const ort = await import("onnxruntime-node");
|
|
47
|
+
const results = [];
|
|
48
|
+
for (const text of texts) {
|
|
49
|
+
const encoded = tokenizer.encode(text);
|
|
50
|
+
const seqLen = encoded.inputIds.length;
|
|
51
|
+
const inputIds = new ort.Tensor("int64", BigInt64Array.from(encoded.inputIds.map(BigInt)), [
|
|
52
|
+
1,
|
|
53
|
+
seqLen,
|
|
54
|
+
]);
|
|
55
|
+
const attentionMask = new ort.Tensor("int64", BigInt64Array.from(encoded.attentionMask.map(BigInt)), [1, seqLen]);
|
|
56
|
+
const tokenTypeIds = new ort.Tensor("int64", BigInt64Array.from(encoded.tokenTypeIds.map(BigInt)), [1, seqLen]);
|
|
57
|
+
const feeds = {
|
|
58
|
+
input_ids: inputIds,
|
|
59
|
+
attention_mask: attentionMask,
|
|
60
|
+
token_type_ids: tokenTypeIds,
|
|
61
|
+
};
|
|
62
|
+
const output = await session.run(feeds);
|
|
63
|
+
// Mean pooling over token embeddings (last hidden state)
|
|
64
|
+
const outputKey = Object.keys(output)[0];
|
|
65
|
+
const lastHiddenState = output["last_hidden_state"] ?? output[outputKey];
|
|
66
|
+
const data = lastHiddenState.data;
|
|
67
|
+
const embedding = this.meanPool(data, encoded.attentionMask, seqLen, DIMENSIONS);
|
|
68
|
+
results.push(this.normalize(embedding));
|
|
69
|
+
}
|
|
70
|
+
return results;
|
|
71
|
+
}
|
|
72
|
+
catch (error) {
|
|
73
|
+
if (error instanceof EmbeddingNotAvailableError || error instanceof EmbeddingError) {
|
|
74
|
+
throw error;
|
|
75
|
+
}
|
|
76
|
+
throw new EmbeddingError("ONNX inference failed", error instanceof Error ? error : new Error(String(error)));
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
getDimensions() {
|
|
80
|
+
return DIMENSIONS;
|
|
81
|
+
}
|
|
82
|
+
async close() {
|
|
83
|
+
if (this.session) {
|
|
84
|
+
await this.session.release();
|
|
85
|
+
this.session = null;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
meanPool(data, attentionMask, seqLen, hiddenSize) {
|
|
89
|
+
const pooled = new Float32Array(hiddenSize);
|
|
90
|
+
let count = 0;
|
|
91
|
+
for (let i = 0; i < seqLen; i++) {
|
|
92
|
+
if (attentionMask[i] === 0)
|
|
93
|
+
continue;
|
|
94
|
+
count++;
|
|
95
|
+
for (let j = 0; j < hiddenSize; j++) {
|
|
96
|
+
pooled[j] += data[i * hiddenSize + j];
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
if (count > 0) {
|
|
100
|
+
for (let j = 0; j < hiddenSize; j++) {
|
|
101
|
+
pooled[j] /= count;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
return pooled;
|
|
105
|
+
}
|
|
106
|
+
normalize(vec) {
|
|
107
|
+
let norm = 0;
|
|
108
|
+
for (const v of vec) {
|
|
109
|
+
norm += v * v;
|
|
110
|
+
}
|
|
111
|
+
norm = Math.sqrt(norm);
|
|
112
|
+
if (norm === 0)
|
|
113
|
+
return vec;
|
|
114
|
+
const result = new Float32Array(vec.length);
|
|
115
|
+
for (let i = 0; i < vec.length; i++) {
|
|
116
|
+
result[i] = vec[i] / norm;
|
|
117
|
+
}
|
|
118
|
+
return result;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
//# sourceMappingURL=onnx-embedding-provider.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"onnx-embedding-provider.js","sourceRoot":"","sources":["../../src/embedding/onnx-embedding-provider.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AACpD,OAAO,EAAE,YAAY,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AACtE,OAAO,EAAE,0BAA0B,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAE1E,MAAM,UAAU,GAAG,GAAG,CAAC;AAEvB,MAAM,OAAO,qBAAqB;IACxB,OAAO,GAA4B,IAAI,CAAC;IACxC,SAAS,GAA8B,IAAI,CAAC;IAC5C,SAAS,CAAS;IAClB,YAAY,CAAe;IAEnC,YAAY,YAAoB,kBAAkB,EAAE,YAA2B;QAC7E,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,YAAY,GAAG,YAAY,IAAI,IAAI,YAAY,EAAE,CAAC;IACzD,CAAC;IAEO,KAAK,CAAC,UAAU;QACtB,IAAI,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC,OAAO,CAAC;QAEtC,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;YACxD,MAAM,IAAI,0BAA0B,CAClC,UAAU,IAAI,CAAC,SAAS,0EAA0E,CACnG,CAAC;QACJ,CAAC;QAED,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;YAC7C,IAAI,CAAC,OAAO,GAAG,MAAM,GAAG,CAAC,gBAAgB,CAAC,MAAM,CAC9C,IAAI,CAAC,YAAY,CAAC,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC,EAC9C,EAAE,kBAAkB,EAAE,CAAC,KAAK,CAAC,EAAE,CAChC,CAAC;YACF,OAAO,IAAI,CAAC,OAAO,CAAC;QACtB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,cAAc,CACtB,8BAA8B,IAAI,CAAC,SAAS,GAAG,EAC/C,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAC1D,CAAC;QACJ,CAAC;IACH,CAAC;IAEO,YAAY;QAClB,IAAI,IAAI,CAAC,SAAS;YAAE,OAAO,IAAI,CAAC,SAAS,CAAC;QAC1C,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;YACxD,MAAM,IAAI,0BAA0B,CAClC,UAAU,IAAI,CAAC,SAAS,0EAA0E,CACnG,CAAC;QACJ,CAAC;QACD,IAAI,CAAC,SAAS,GAAG,IAAI,kBAAkB,CAAC,IAAI,CAAC,YAAY,CAAC,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;QAC5F,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,IAAY;QACtB,MAAM,CAAC,MAAM,CAAC,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QAC/C,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAe;QAC9B,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;QACxC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QAEtC,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;YAC7C,MAAM,OAAO,GAAmB,EAAE,CAAC;YAEnC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACzB,MAAM,OAAO,GAAG,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;gBACvC,MAAM,MAAM,GAAG,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAEvC,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EAAE;oBACzF,CAAC;oBACD,MAAM;iBACP,CAAC,CAAC;gBACH,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,MAAM,CAClC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EACrD,CAAC,CAAC,EAAE,MAAM,CAAC,CACZ,CAAC;gBACF,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,MAAM,CACjC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EACpD,CAAC,CAAC,EAAE,MAAM,CAAC,CACZ,CAAC;gBAEF,MAAM,KAAK,GAA2B;oBACpC,SAAS,EAAE,QAAQ;oBACnB,cAAc,EAAE,aAAa;oBAC7B,cAAc,EAAE,YAAY;iBAC7B,CAAC;gBACF,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBAExC,yDAAyD;gBACzD,MAAM,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;gBACzC,MAAM,eAAe,GAAG,MAAM,CAAC,mBAAmB,CAAC,IAAI,MAAM,CAAC,SAAS,CAAC,CAAC;gBACzE,MAAM,IAAI,GAAG,eAAe,CAAC,IAAoB,CAAC;gBAClD,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,aAAa,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;gBACjF,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC,CAAC;YAC1C,CAAC;YAED,OAAO,OAAO,CAAC;QACjB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,KAAK,YAAY,0BAA0B,IAAI,KAAK,YAAY,cAAc,EAAE,CAAC;gBACnF,MAAM,KAAK,CAAC;YACd,CAAC;YACD,MAAM,IAAI,cAAc,CACtB,uBAAuB,EACvB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAC1D,CAAC;QACJ,CAAC;IACH,CAAC;IAED,aAAa;QACX,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,KAAK,CAAC,KAAK;QACT,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;YAC7B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;IACH,CAAC;IAEO,QAAQ,CACd,IAAkB,EAClB,aAAuB,EACvB,MAAc,EACd,UAAkB;QAElB,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,UAAU,CAAC,CAAC;QAC5C,IAAI,KAAK,GAAG,CAAC,CAAC;QAEd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAChC,IAAI,aAAa,CAAC,CAAC,CAAC,KAAK,CAAC;gBAAE,SAAS;YACrC,KAAK,EAAE,CAAC;YACR,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;gBACpC,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC;YACxC,CAAC;QACH,CAAC;QAED,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;YACd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;gBACpC,MAAM,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC;YACrB,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,SAAS,CAAC,GAAiB;QACjC,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,KAAK,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC;YACpB,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC;QAChB,CAAC;QACD,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvB,IAAI,IAAI,KAAK,CAAC;YAAE,OAAO,GAAG,CAAC;QAC3B,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAC5C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACpC,MAAM,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;QAC5B,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;CACF"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
interface EncodingResult {
|
|
2
|
+
inputIds: number[];
|
|
3
|
+
attentionMask: number[];
|
|
4
|
+
tokenTypeIds: number[];
|
|
5
|
+
}
|
|
6
|
+
export declare class WordPieceTokenizer {
|
|
7
|
+
private vocab;
|
|
8
|
+
private unkId;
|
|
9
|
+
private clsId;
|
|
10
|
+
private sepId;
|
|
11
|
+
private padId;
|
|
12
|
+
constructor(tokenizerJsonPath: string);
|
|
13
|
+
encode(text: string): EncodingResult;
|
|
14
|
+
private lookupToken;
|
|
15
|
+
private tokenize;
|
|
16
|
+
private wordpieceTokenize;
|
|
17
|
+
}
|
|
18
|
+
export {};
|
|
19
|
+
//# sourceMappingURL=tokenizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../../src/embedding/tokenizer.ts"],"names":[],"mappings":"AASA,UAAU,cAAc;IACtB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB;AAQD,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,KAAK,CAAsB;IACnC,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,KAAK,CAAS;gBAEV,iBAAiB,EAAE,MAAM;IAWrC,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,cAAc;IAsBpC,OAAO,CAAC,WAAW;IAInB,OAAO,CAAC,QAAQ;IAUhB,OAAO,CAAC,iBAAiB;CAiC1B"}
|