sonamu 0.7.8 → 0.7.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +6 -2
- package/dist/database/base-model.d.ts +47 -2
- package/dist/database/base-model.d.ts.map +1 -1
- package/dist/database/base-model.js +87 -5
- package/dist/entity/entity-manager.d.ts +5 -5
- package/dist/entity/entity.d.ts +9 -0
- package/dist/entity/entity.d.ts.map +1 -1
- package/dist/entity/entity.js +16 -1
- package/dist/migration/code-generation.d.ts.map +1 -1
- package/dist/migration/code-generation.js +12 -9
- package/dist/migration/migration-set.js +3 -1
- package/dist/migration/postgresql-schema-reader.d.ts.map +1 -1
- package/dist/migration/postgresql-schema-reader.js +3 -2
- package/dist/template/implementations/generated.template.d.ts.map +1 -1
- package/dist/template/implementations/generated.template.js +3 -2
- package/dist/types/types.d.ts +30 -25
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/types.js +10 -7
- package/dist/vector/config.d.ts.map +1 -1
- package/dist/vector/config.js +2 -2
- package/dist/vector/embedding.d.ts +12 -8
- package/dist/vector/embedding.d.ts.map +1 -1
- package/dist/vector/embedding.js +59 -74
- package/dist/vector/vector-search.js +2 -2
- package/package.json +16 -9
- package/src/database/base-model.ts +132 -7
- package/src/entity/entity.ts +19 -0
- package/src/migration/code-generation.ts +15 -8
- package/src/migration/migration-set.ts +2 -0
- package/src/migration/postgresql-schema-reader.ts +1 -0
- package/src/template/implementations/generated.template.ts +3 -4
- package/src/types/types.ts +12 -6
- package/src/vector/config.ts +2 -4
- package/src/vector/embedding.ts +73 -104
- package/src/vector/vector-search.ts +1 -1
|
@@ -4,7 +4,7 @@ import { Sonamu } from "../../api";
|
|
|
4
4
|
import type { Entity } from "../../entity/entity";
|
|
5
5
|
import { EntityManager } from "../../entity/entity-manager";
|
|
6
6
|
import { Naite } from "../../naite/naite";
|
|
7
|
-
import { type EntityPropNode, isVirtualProp } from "../../types/types";
|
|
7
|
+
import { type EntityIndex, type EntityPropNode, isVirtualProp } from "../../types/types";
|
|
8
8
|
import { nonNullable } from "../../utils/utils";
|
|
9
9
|
import { Template } from "../template";
|
|
10
10
|
import { propNodeToZodTypeDef, zodTypeToZodCode } from "../zod-converter";
|
|
@@ -168,9 +168,8 @@ export class Template__generated extends Template {
|
|
|
168
168
|
})();
|
|
169
169
|
|
|
170
170
|
// fulltext index에 포함된 컬럼들 추출
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
);
|
|
171
|
+
// TODO: GIN/GiST 인덱스 생성된 컬럼 추출
|
|
172
|
+
const fulltextColumns: EntityIndex["columns"][] = [];
|
|
174
173
|
|
|
175
174
|
// virtual props
|
|
176
175
|
const virtualProps = entity.props
|
package/src/types/types.ts
CHANGED
|
@@ -113,6 +113,9 @@ export type VectorArrayProp = CommonProp & {
|
|
|
113
113
|
type: "vector[]";
|
|
114
114
|
dimensions: number;
|
|
115
115
|
};
|
|
116
|
+
export type TsVectorProp = CommonProp & {
|
|
117
|
+
type: "tsvector";
|
|
118
|
+
};
|
|
116
119
|
export type RelationType = "HasMany" | "BelongsToOne" | "ManyToMany" | "OneToOne";
|
|
117
120
|
export type RelationOn = "CASCADE" | "SET NULL" | "NO ACTION" | "SET DEFAULT" | "RESTRICT";
|
|
118
121
|
type _RelationProp = {
|
|
@@ -185,6 +188,7 @@ export type EntityProp =
|
|
|
185
188
|
| VirtualProp
|
|
186
189
|
| VectorProp
|
|
187
190
|
| VectorArrayProp
|
|
191
|
+
| TsVectorProp
|
|
188
192
|
| RelationProp;
|
|
189
193
|
|
|
190
194
|
/**
|
|
@@ -220,10 +224,10 @@ type EntityIndexColumn = {
|
|
|
220
224
|
vectorOps?: VectorOps;
|
|
221
225
|
};
|
|
222
226
|
export type EntityIndex = {
|
|
223
|
-
type: "index" | "unique" | "
|
|
227
|
+
type: "index" | "unique" | "hnsw" | "ivfflat";
|
|
224
228
|
columns: EntityIndexColumn[];
|
|
225
229
|
name: string;
|
|
226
|
-
|
|
230
|
+
using?: "btree" | "hash" | "gin" | "gist";
|
|
227
231
|
nullsNotDistinct?: boolean; // unique index only
|
|
228
232
|
/**
|
|
229
233
|
* HNSW (Hierarchical Navigable Small World) 인덱스: 각 노드의 최대 연결 수
|
|
@@ -547,10 +551,10 @@ export type MigrationColumn = {
|
|
|
547
551
|
generated?: GeneratedColumn;
|
|
548
552
|
};
|
|
549
553
|
export type MigrationIndex = {
|
|
550
|
-
type: "unique" | "index" | "
|
|
554
|
+
type: "unique" | "index" | "hnsw" | "ivfflat";
|
|
551
555
|
columns: EntityIndexColumn[];
|
|
552
556
|
name: string;
|
|
553
|
-
|
|
557
|
+
using?: "btree" | "hash" | "gin" | "gist";
|
|
554
558
|
nullsNotDistinct?: boolean;
|
|
555
559
|
/** HNSW (Hierarchical Navigable Small World): 각 노드의 최대 연결 수 */
|
|
556
560
|
m?: number;
|
|
@@ -842,6 +846,7 @@ const BasePropFieldsWithoutAdditional = z
|
|
|
842
846
|
z.literal("date[]"),
|
|
843
847
|
z.literal("uuid"),
|
|
844
848
|
z.literal("uuid[]"),
|
|
849
|
+
z.literal("tsvector"),
|
|
845
850
|
]),
|
|
846
851
|
})
|
|
847
852
|
.strict();
|
|
@@ -1038,6 +1043,7 @@ const NormalPropTypes = [
|
|
|
1038
1043
|
"virtual",
|
|
1039
1044
|
"vector",
|
|
1040
1045
|
"vector[]",
|
|
1046
|
+
"tsvector",
|
|
1041
1047
|
] as const;
|
|
1042
1048
|
|
|
1043
1049
|
// VIRTUAL Generated Column에서 사용 불가능한 타입들
|
|
@@ -1131,10 +1137,10 @@ const EntityIndexColumnSchema = z.object({
|
|
|
1131
1137
|
// EntityIndex 스키마 정의
|
|
1132
1138
|
const EntityIndexSchema = z
|
|
1133
1139
|
.object({
|
|
1134
|
-
type: z.enum(["index", "unique", "
|
|
1140
|
+
type: z.enum(["index", "unique", "hnsw", "ivfflat"]),
|
|
1135
1141
|
columns: z.array(EntityIndexColumnSchema),
|
|
1136
1142
|
name: z.string().min(1).max(63),
|
|
1137
|
-
|
|
1143
|
+
using: z.enum(["btree", "hash", "gin", "gist"]).optional(),
|
|
1138
1144
|
nullsNotDistinct: z.boolean().optional(),
|
|
1139
1145
|
m: z.number().optional(),
|
|
1140
1146
|
efConstruction: z.number().optional(),
|
package/src/vector/config.ts
CHANGED
|
@@ -13,7 +13,7 @@ export const DEFAULT_VECTOR_CONFIG: VectorConfig = {
|
|
|
13
13
|
model: "voyage-3",
|
|
14
14
|
dimensions: 1024,
|
|
15
15
|
maxTokens: 32000,
|
|
16
|
-
batchSize:
|
|
16
|
+
batchSize: 128,
|
|
17
17
|
},
|
|
18
18
|
|
|
19
19
|
// OpenAI 설정
|
|
@@ -55,9 +55,7 @@ export const DEFAULT_VECTOR_CONFIG: VectorConfig = {
|
|
|
55
55
|
* 설정 생성 헬퍼 함수
|
|
56
56
|
* 부분 설정만 제공하면 나머지는 기본값 사용
|
|
57
57
|
*/
|
|
58
|
-
export function createVectorConfig(
|
|
59
|
-
overrides: Partial<VectorConfig> = {}
|
|
60
|
-
): VectorConfig {
|
|
58
|
+
export function createVectorConfig(overrides: Partial<VectorConfig> = {}): VectorConfig {
|
|
61
59
|
return {
|
|
62
60
|
voyage: { ...DEFAULT_VECTOR_CONFIG.voyage, ...overrides.voyage },
|
|
63
61
|
openai: { ...DEFAULT_VECTOR_CONFIG.openai, ...overrides.openai },
|
package/src/vector/embedding.ts
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
import { createOpenAI, type OpenAIProvider } from "@ai-sdk/openai";
|
|
2
|
+
import { type EmbeddingModel, embedMany } from "ai";
|
|
3
|
+
import { VoyageAIClient } from "voyageai";
|
|
1
4
|
import { Sonamu } from "../api/sonamu";
|
|
2
5
|
import { DEFAULT_VECTOR_CONFIG } from "./config";
|
|
3
6
|
import type {
|
|
@@ -10,7 +13,7 @@ import type {
|
|
|
10
13
|
|
|
11
14
|
/**
|
|
12
15
|
* 임베딩 클라이언트
|
|
13
|
-
* Voyage AI와 OpenAI 임베딩을 통합 지원
|
|
16
|
+
* Voyage AI와 OpenAI 임베딩을 SDK 방식으로 통합 지원
|
|
14
17
|
*/
|
|
15
18
|
export class Embedding {
|
|
16
19
|
private config: VectorConfig;
|
|
@@ -25,22 +28,64 @@ export class Embedding {
|
|
|
25
28
|
};
|
|
26
29
|
}
|
|
27
30
|
|
|
31
|
+
/**
|
|
32
|
+
* Voyage AI 클라이언트 초기화
|
|
33
|
+
*/
|
|
34
|
+
private getVoyageClient(): VoyageAIClient {
|
|
35
|
+
const apiKey = Sonamu.secrets?.voyage_api_key ?? process.env.VOYAGE_API_KEY;
|
|
36
|
+
if (!apiKey) {
|
|
37
|
+
throw new Error("VOYAGE_API_KEY가 설정되지 않았습니다. 환경변수를 확인하세요.");
|
|
38
|
+
}
|
|
39
|
+
return new VoyageAIClient({ apiKey });
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* OpenAI provider 생성
|
|
44
|
+
*/
|
|
45
|
+
private getOpenAIProvider(): OpenAIProvider {
|
|
46
|
+
const apiKey = Sonamu.secrets?.openai_api_key ?? process.env.OPENAI_API_KEY;
|
|
47
|
+
if (!apiKey) {
|
|
48
|
+
throw new Error("OPENAI_API_KEY가 설정되지 않았습니다. 환경변수를 확인하세요.");
|
|
49
|
+
}
|
|
50
|
+
return createOpenAI({ apiKey });
|
|
51
|
+
}
|
|
52
|
+
|
|
28
53
|
/**
|
|
29
54
|
* 텍스트 임베딩 생성
|
|
30
|
-
* @param texts - 임베딩할 텍스트 배열
|
|
55
|
+
* @param texts - 임베딩할 텍스트 배열 (batchSize이상 시 자동 분할)
|
|
31
56
|
* @param provider - 'voyage' | 'openai'
|
|
32
57
|
* @param inputType - 'document' | 'query' (Voyage AI만 해당)
|
|
58
|
+
* @param onProgress - 진행률 콜백
|
|
33
59
|
*/
|
|
34
60
|
async embed(
|
|
35
61
|
texts: string[],
|
|
36
62
|
provider: EmbeddingProvider,
|
|
37
|
-
inputType: VectorInputType = "document"
|
|
63
|
+
inputType: VectorInputType = "document",
|
|
64
|
+
onProgress?: ProgressCallback,
|
|
38
65
|
): Promise<EmbeddingResult[]> {
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
66
|
+
const maxBatchSize =
|
|
67
|
+
provider === "voyage" ? this.config.voyage.batchSize : this.config.openai.batchSize;
|
|
68
|
+
|
|
69
|
+
// batchSize이하면 바로 호출
|
|
70
|
+
if (texts.length <= maxBatchSize) {
|
|
71
|
+
return provider === "voyage"
|
|
72
|
+
? await this.embedVoyage(texts, inputType)
|
|
73
|
+
: await this.embedOpenAI(texts);
|
|
43
74
|
}
|
|
75
|
+
|
|
76
|
+
// batchSize이상이면 자동으로 나눠서 처리
|
|
77
|
+
const batches = Array.from({ length: Math.ceil(texts.length / maxBatchSize) }, (_, i) =>
|
|
78
|
+
texts.slice(i * maxBatchSize, (i + 1) * maxBatchSize),
|
|
79
|
+
);
|
|
80
|
+
|
|
81
|
+
const results = await Promise.all(
|
|
82
|
+
batches.map((batch) =>
|
|
83
|
+
provider === "voyage" ? this.embedVoyage(batch, inputType) : this.embedOpenAI(batch),
|
|
84
|
+
),
|
|
85
|
+
);
|
|
86
|
+
|
|
87
|
+
onProgress?.(texts.length, texts.length);
|
|
88
|
+
return results.flat();
|
|
44
89
|
}
|
|
45
90
|
|
|
46
91
|
/**
|
|
@@ -49,7 +94,7 @@ export class Embedding {
|
|
|
49
94
|
async embedOne(
|
|
50
95
|
text: string,
|
|
51
96
|
provider: EmbeddingProvider,
|
|
52
|
-
inputType: VectorInputType = "document"
|
|
97
|
+
inputType: VectorInputType = "document",
|
|
53
98
|
): Promise<EmbeddingResult> {
|
|
54
99
|
const results = await this.embed([text], provider, inputType);
|
|
55
100
|
return results[0];
|
|
@@ -60,42 +105,24 @@ export class Embedding {
|
|
|
60
105
|
*/
|
|
61
106
|
private async embedVoyage(
|
|
62
107
|
texts: string[],
|
|
63
|
-
inputType: VectorInputType
|
|
108
|
+
inputType: VectorInputType,
|
|
64
109
|
): Promise<EmbeddingResult[]> {
|
|
110
|
+
const client = this.getVoyageClient();
|
|
65
111
|
const voyageConfig = this.config.voyage;
|
|
66
112
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
"VOYAGE_API_KEY가 설정되지 않았습니다. 환경변수를 확인하세요."
|
|
72
|
-
);
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
const response = await fetch(voyageConfig.baseUrl, {
|
|
76
|
-
method: "POST",
|
|
77
|
-
headers: {
|
|
78
|
-
"Content-Type": "application/json",
|
|
79
|
-
Authorization: `Bearer ${apiKey}`,
|
|
80
|
-
},
|
|
81
|
-
body: JSON.stringify({
|
|
82
|
-
input: texts,
|
|
83
|
-
model: voyageConfig.model,
|
|
84
|
-
input_type: inputType,
|
|
85
|
-
}),
|
|
113
|
+
const response = await client.embed({
|
|
114
|
+
input: texts,
|
|
115
|
+
model: voyageConfig.model,
|
|
116
|
+
inputType: inputType,
|
|
86
117
|
});
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
const error = await response.text();
|
|
90
|
-
throw new Error(`Voyage API error: ${response.status} - ${error}`);
|
|
118
|
+
if (!response.data) {
|
|
119
|
+
throw new Error("Voyage API: 응답 데이터가 없습니다.");
|
|
91
120
|
}
|
|
92
121
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
return data.data.map((item: { embedding: number[] }) => ({
|
|
96
|
-
embedding: item.embedding,
|
|
122
|
+
return response.data.map((item) => ({
|
|
123
|
+
embedding: item.embedding ?? [],
|
|
97
124
|
model: voyageConfig.model,
|
|
98
|
-
tokenCount:
|
|
125
|
+
tokenCount: response.usage?.totalTokens ?? 0,
|
|
99
126
|
}));
|
|
100
127
|
}
|
|
101
128
|
|
|
@@ -103,74 +130,22 @@ export class Embedding {
|
|
|
103
130
|
* OpenAI 임베딩
|
|
104
131
|
*/
|
|
105
132
|
private async embedOpenAI(texts: string[]): Promise<EmbeddingResult[]> {
|
|
133
|
+
const openai = this.getOpenAIProvider();
|
|
106
134
|
const openaiConfig = this.config.openai;
|
|
135
|
+
const model = openai.embeddingModel(openaiConfig.model);
|
|
107
136
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
throw new Error(
|
|
112
|
-
"OPENAI_API_KEY가 설정되지 않았습니다. 환경변수를 확인하세요."
|
|
113
|
-
);
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
const response = await fetch(openaiConfig.baseUrl, {
|
|
117
|
-
method: "POST",
|
|
118
|
-
headers: {
|
|
119
|
-
"Content-Type": "application/json",
|
|
120
|
-
Authorization: `Bearer ${apiKey}`,
|
|
121
|
-
},
|
|
122
|
-
body: JSON.stringify({
|
|
123
|
-
input: texts,
|
|
124
|
-
model: openaiConfig.model,
|
|
125
|
-
}),
|
|
137
|
+
const { embeddings, usage } = await embedMany({
|
|
138
|
+
model: model as EmbeddingModel,
|
|
139
|
+
values: texts,
|
|
126
140
|
});
|
|
127
141
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
throw new Error(`OpenAI API error: ${response.status} - ${error}`);
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
const data = await response.json();
|
|
134
|
-
|
|
135
|
-
return data.data.map((item: { embedding: number[] }) => ({
|
|
136
|
-
embedding: item.embedding,
|
|
142
|
+
return embeddings.map((embedding) => ({
|
|
143
|
+
embedding,
|
|
137
144
|
model: openaiConfig.model,
|
|
138
|
-
tokenCount:
|
|
145
|
+
tokenCount: usage?.tokens ?? 0,
|
|
139
146
|
}));
|
|
140
147
|
}
|
|
141
148
|
|
|
142
|
-
/**
|
|
143
|
-
* 배치 임베딩 (대량 처리)
|
|
144
|
-
*/
|
|
145
|
-
async embedBatch(
|
|
146
|
-
texts: string[],
|
|
147
|
-
provider: EmbeddingProvider,
|
|
148
|
-
inputType: VectorInputType = "document",
|
|
149
|
-
onProgress?: ProgressCallback
|
|
150
|
-
): Promise<EmbeddingResult[]> {
|
|
151
|
-
const batchSize =
|
|
152
|
-
provider === "voyage"
|
|
153
|
-
? this.config.voyage.batchSize
|
|
154
|
-
: this.config.openai.batchSize;
|
|
155
|
-
|
|
156
|
-
const results: EmbeddingResult[] = [];
|
|
157
|
-
|
|
158
|
-
for (let i = 0; i < texts.length; i += batchSize) {
|
|
159
|
-
const batch = texts.slice(i, i + batchSize);
|
|
160
|
-
const batchResults = await this.embed(batch, provider, inputType);
|
|
161
|
-
results.push(...batchResults);
|
|
162
|
-
|
|
163
|
-
onProgress?.(Math.min(i + batchSize, texts.length), texts.length);
|
|
164
|
-
|
|
165
|
-
// Rate limiting (100ms between batches)
|
|
166
|
-
if (i + batchSize < texts.length) {
|
|
167
|
-
await this.delay(100);
|
|
168
|
-
}
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
return results;
|
|
172
|
-
}
|
|
173
|
-
|
|
174
149
|
/**
|
|
175
150
|
* 벡터를 PostgreSQL vector 타입 문자열로 변환
|
|
176
151
|
*/
|
|
@@ -182,12 +157,6 @@ export class Embedding {
|
|
|
182
157
|
* 임베딩 provider의 차원 수 반환
|
|
183
158
|
*/
|
|
184
159
|
getDimensions(provider: EmbeddingProvider): number {
|
|
185
|
-
return provider === "voyage"
|
|
186
|
-
? this.config.voyage.dimensions
|
|
187
|
-
: this.config.openai.dimensions;
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
private delay(ms: number): Promise<void> {
|
|
191
|
-
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
160
|
+
return provider === "voyage" ? this.config.voyage.dimensions : this.config.openai.dimensions;
|
|
192
161
|
}
|
|
193
162
|
}
|
|
@@ -64,7 +64,7 @@ export class VectorSearch<T = Record<string, unknown>> {
|
|
|
64
64
|
onProgress?: ProgressCallback,
|
|
65
65
|
): Promise<void> {
|
|
66
66
|
const texts = items.map((item) => item.text);
|
|
67
|
-
const embeddings = await this.embedding.
|
|
67
|
+
const embeddings = await this.embedding.embed(texts, provider, "document", onProgress);
|
|
68
68
|
|
|
69
69
|
await this.db.transaction(async (trx) => {
|
|
70
70
|
for (let i = 0; i < items.length; i++) {
|