sonamu 0.7.8 → 0.7.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/bin/cli.js +6 -2
  2. package/dist/database/base-model.d.ts +47 -2
  3. package/dist/database/base-model.d.ts.map +1 -1
  4. package/dist/database/base-model.js +87 -5
  5. package/dist/entity/entity-manager.d.ts +5 -5
  6. package/dist/entity/entity.d.ts +9 -0
  7. package/dist/entity/entity.d.ts.map +1 -1
  8. package/dist/entity/entity.js +16 -1
  9. package/dist/migration/code-generation.d.ts.map +1 -1
  10. package/dist/migration/code-generation.js +12 -9
  11. package/dist/migration/migration-set.js +3 -1
  12. package/dist/migration/postgresql-schema-reader.d.ts.map +1 -1
  13. package/dist/migration/postgresql-schema-reader.js +3 -2
  14. package/dist/template/implementations/generated.template.d.ts.map +1 -1
  15. package/dist/template/implementations/generated.template.js +3 -2
  16. package/dist/types/types.d.ts +30 -25
  17. package/dist/types/types.d.ts.map +1 -1
  18. package/dist/types/types.js +10 -7
  19. package/dist/vector/config.d.ts.map +1 -1
  20. package/dist/vector/config.js +2 -2
  21. package/dist/vector/embedding.d.ts +12 -8
  22. package/dist/vector/embedding.d.ts.map +1 -1
  23. package/dist/vector/embedding.js +59 -74
  24. package/dist/vector/vector-search.js +2 -2
  25. package/package.json +16 -9
  26. package/src/database/base-model.ts +132 -7
  27. package/src/entity/entity.ts +19 -0
  28. package/src/migration/code-generation.ts +15 -8
  29. package/src/migration/migration-set.ts +2 -0
  30. package/src/migration/postgresql-schema-reader.ts +1 -0
  31. package/src/template/implementations/generated.template.ts +3 -4
  32. package/src/types/types.ts +12 -6
  33. package/src/vector/config.ts +2 -4
  34. package/src/vector/embedding.ts +73 -104
  35. package/src/vector/vector-search.ts +1 -1
@@ -148,6 +148,7 @@ class PostgreSQLSchemaReaderClass {
148
148
  sortOrder: idx.sort_order,
149
149
  })),
150
150
  nullsNotDistinct: firstIndex.nulls_not_distinct,
151
+ using: firstIndex.index_type as "btree" | "hash" | "gin" | "gist" | undefined,
151
152
  };
152
153
  });
153
154
 
@@ -4,7 +4,7 @@ import { Sonamu } from "../../api";
4
4
  import type { Entity } from "../../entity/entity";
5
5
  import { EntityManager } from "../../entity/entity-manager";
6
6
  import { Naite } from "../../naite/naite";
7
- import { type EntityPropNode, isVirtualProp } from "../../types/types";
7
+ import { type EntityIndex, type EntityPropNode, isVirtualProp } from "../../types/types";
8
8
  import { nonNullable } from "../../utils/utils";
9
9
  import { Template } from "../template";
10
10
  import { propNodeToZodTypeDef, zodTypeToZodCode } from "../zod-converter";
@@ -168,9 +168,8 @@ export class Template__generated extends Template {
168
168
  })();
169
169
 
170
170
  // fulltext index에 포함된 컬럼들 추출
171
- const fulltextColumns = unique(
172
- entity.indexes.filter((index) => index.type === "fulltext").flatMap((index) => index.columns),
173
- );
171
+ // TODO: GIN/GiST 인덱스 생성된 컬럼 추출
172
+ const fulltextColumns: EntityIndex["columns"][] = [];
174
173
 
175
174
  // virtual props
176
175
  const virtualProps = entity.props
@@ -113,6 +113,9 @@ export type VectorArrayProp = CommonProp & {
113
113
  type: "vector[]";
114
114
  dimensions: number;
115
115
  };
116
+ export type TsVectorProp = CommonProp & {
117
+ type: "tsvector";
118
+ };
116
119
  export type RelationType = "HasMany" | "BelongsToOne" | "ManyToMany" | "OneToOne";
117
120
  export type RelationOn = "CASCADE" | "SET NULL" | "NO ACTION" | "SET DEFAULT" | "RESTRICT";
118
121
  type _RelationProp = {
@@ -185,6 +188,7 @@ export type EntityProp =
185
188
  | VirtualProp
186
189
  | VectorProp
187
190
  | VectorArrayProp
191
+ | TsVectorProp
188
192
  | RelationProp;
189
193
 
190
194
  /**
@@ -220,10 +224,10 @@ type EntityIndexColumn = {
220
224
  vectorOps?: VectorOps;
221
225
  };
222
226
  export type EntityIndex = {
223
- type: "index" | "unique" | "fulltext" | "hnsw" | "ivfflat";
227
+ type: "index" | "unique" | "hnsw" | "ivfflat";
224
228
  columns: EntityIndexColumn[];
225
229
  name: string;
226
- parser?: "built-in" | "ngram";
230
+ using?: "btree" | "hash" | "gin" | "gist";
227
231
  nullsNotDistinct?: boolean; // unique index only
228
232
  /**
229
233
  * HNSW (Hierarchical Navigable Small World) 인덱스: 각 노드의 최대 연결 수
@@ -547,10 +551,10 @@ export type MigrationColumn = {
547
551
  generated?: GeneratedColumn;
548
552
  };
549
553
  export type MigrationIndex = {
550
- type: "unique" | "index" | "fulltext" | "hnsw" | "ivfflat";
554
+ type: "unique" | "index" | "hnsw" | "ivfflat";
551
555
  columns: EntityIndexColumn[];
552
556
  name: string;
553
- parser?: "built-in" | "ngram";
557
+ using?: "btree" | "hash" | "gin" | "gist";
554
558
  nullsNotDistinct?: boolean;
555
559
  /** HNSW (Hierarchical Navigable Small World): 각 노드의 최대 연결 수 */
556
560
  m?: number;
@@ -842,6 +846,7 @@ const BasePropFieldsWithoutAdditional = z
842
846
  z.literal("date[]"),
843
847
  z.literal("uuid"),
844
848
  z.literal("uuid[]"),
849
+ z.literal("tsvector"),
845
850
  ]),
846
851
  })
847
852
  .strict();
@@ -1038,6 +1043,7 @@ const NormalPropTypes = [
1038
1043
  "virtual",
1039
1044
  "vector",
1040
1045
  "vector[]",
1046
+ "tsvector",
1041
1047
  ] as const;
1042
1048
 
1043
1049
  // VIRTUAL Generated Column에서 사용 불가능한 타입들
@@ -1131,10 +1137,10 @@ const EntityIndexColumnSchema = z.object({
1131
1137
  // EntityIndex 스키마 정의
1132
1138
  const EntityIndexSchema = z
1133
1139
  .object({
1134
- type: z.enum(["index", "unique", "fulltext", "hnsw", "ivfflat"]),
1140
+ type: z.enum(["index", "unique", "hnsw", "ivfflat"]),
1135
1141
  columns: z.array(EntityIndexColumnSchema),
1136
1142
  name: z.string().min(1).max(63),
1137
- parser: z.enum(["built-in", "ngram"]).optional(),
1143
+ using: z.enum(["btree", "hash", "gin", "gist"]).optional(),
1138
1144
  nullsNotDistinct: z.boolean().optional(),
1139
1145
  m: z.number().optional(),
1140
1146
  efConstruction: z.number().optional(),
@@ -13,7 +13,7 @@ export const DEFAULT_VECTOR_CONFIG: VectorConfig = {
13
13
  model: "voyage-3",
14
14
  dimensions: 1024,
15
15
  maxTokens: 32000,
16
- batchSize: 100,
16
+ batchSize: 128,
17
17
  },
18
18
 
19
19
  // OpenAI 설정
@@ -55,9 +55,7 @@ export const DEFAULT_VECTOR_CONFIG: VectorConfig = {
55
55
  * 설정 생성 헬퍼 함수
56
56
  * 부분 설정만 제공하면 나머지는 기본값 사용
57
57
  */
58
- export function createVectorConfig(
59
- overrides: Partial<VectorConfig> = {}
60
- ): VectorConfig {
58
+ export function createVectorConfig(overrides: Partial<VectorConfig> = {}): VectorConfig {
61
59
  return {
62
60
  voyage: { ...DEFAULT_VECTOR_CONFIG.voyage, ...overrides.voyage },
63
61
  openai: { ...DEFAULT_VECTOR_CONFIG.openai, ...overrides.openai },
@@ -1,3 +1,6 @@
1
+ import { createOpenAI, type OpenAIProvider } from "@ai-sdk/openai";
2
+ import { type EmbeddingModel, embedMany } from "ai";
3
+ import { VoyageAIClient } from "voyageai";
1
4
  import { Sonamu } from "../api/sonamu";
2
5
  import { DEFAULT_VECTOR_CONFIG } from "./config";
3
6
  import type {
@@ -10,7 +13,7 @@ import type {
10
13
 
11
14
  /**
12
15
  * 임베딩 클라이언트
13
- * Voyage AI와 OpenAI 임베딩을 통합 지원
16
+ * Voyage AI와 OpenAI 임베딩을 SDK 방식으로 통합 지원
14
17
  */
15
18
  export class Embedding {
16
19
  private config: VectorConfig;
@@ -25,22 +28,64 @@ export class Embedding {
25
28
  };
26
29
  }
27
30
 
31
+ /**
32
+ * Voyage AI 클라이언트 초기화
33
+ */
34
+ private getVoyageClient(): VoyageAIClient {
35
+ const apiKey = Sonamu.secrets?.voyage_api_key ?? process.env.VOYAGE_API_KEY;
36
+ if (!apiKey) {
37
+ throw new Error("VOYAGE_API_KEY가 설정되지 않았습니다. 환경변수를 확인하세요.");
38
+ }
39
+ return new VoyageAIClient({ apiKey });
40
+ }
41
+
42
+ /**
43
+ * OpenAI provider 생성
44
+ */
45
+ private getOpenAIProvider(): OpenAIProvider {
46
+ const apiKey = Sonamu.secrets?.openai_api_key ?? process.env.OPENAI_API_KEY;
47
+ if (!apiKey) {
48
+ throw new Error("OPENAI_API_KEY가 설정되지 않았습니다. 환경변수를 확인하세요.");
49
+ }
50
+ return createOpenAI({ apiKey });
51
+ }
52
+
28
53
  /**
29
54
  * 텍스트 임베딩 생성
30
- * @param texts - 임베딩할 텍스트 배열
55
+ * @param texts - 임베딩할 텍스트 배열 (batchSize이상 시 자동 분할)
31
56
  * @param provider - 'voyage' | 'openai'
32
57
  * @param inputType - 'document' | 'query' (Voyage AI만 해당)
58
+ * @param onProgress - 진행률 콜백
33
59
  */
34
60
  async embed(
35
61
  texts: string[],
36
62
  provider: EmbeddingProvider,
37
- inputType: VectorInputType = "document"
63
+ inputType: VectorInputType = "document",
64
+ onProgress?: ProgressCallback,
38
65
  ): Promise<EmbeddingResult[]> {
39
- if (provider === "voyage") {
40
- return this.embedVoyage(texts, inputType);
41
- } else {
42
- return this.embedOpenAI(texts);
66
+ const maxBatchSize =
67
+ provider === "voyage" ? this.config.voyage.batchSize : this.config.openai.batchSize;
68
+
69
+ // batchSize이하면 바로 호출
70
+ if (texts.length <= maxBatchSize) {
71
+ return provider === "voyage"
72
+ ? await this.embedVoyage(texts, inputType)
73
+ : await this.embedOpenAI(texts);
43
74
  }
75
+
76
+ // batchSize이상이면 자동으로 나눠서 처리
77
+ const batches = Array.from({ length: Math.ceil(texts.length / maxBatchSize) }, (_, i) =>
78
+ texts.slice(i * maxBatchSize, (i + 1) * maxBatchSize),
79
+ );
80
+
81
+ const results = await Promise.all(
82
+ batches.map((batch) =>
83
+ provider === "voyage" ? this.embedVoyage(batch, inputType) : this.embedOpenAI(batch),
84
+ ),
85
+ );
86
+
87
+ onProgress?.(texts.length, texts.length);
88
+ return results.flat();
44
89
  }
45
90
 
46
91
  /**
@@ -49,7 +94,7 @@ export class Embedding {
49
94
  async embedOne(
50
95
  text: string,
51
96
  provider: EmbeddingProvider,
52
- inputType: VectorInputType = "document"
97
+ inputType: VectorInputType = "document",
53
98
  ): Promise<EmbeddingResult> {
54
99
  const results = await this.embed([text], provider, inputType);
55
100
  return results[0];
@@ -60,42 +105,24 @@ export class Embedding {
60
105
  */
61
106
  private async embedVoyage(
62
107
  texts: string[],
63
- inputType: VectorInputType
108
+ inputType: VectorInputType,
64
109
  ): Promise<EmbeddingResult[]> {
110
+ const client = this.getVoyageClient();
65
111
  const voyageConfig = this.config.voyage;
66
112
 
67
- // config에서 설정된 apiKey 우선, 없으면 Sonamu.secrets에서 로드
68
- const apiKey = voyageConfig.apiKey || Sonamu.secrets?.voyage_api_key;
69
- if (!apiKey) {
70
- throw new Error(
71
- "VOYAGE_API_KEY가 설정되지 않았습니다. 환경변수를 확인하세요."
72
- );
73
- }
74
-
75
- const response = await fetch(voyageConfig.baseUrl, {
76
- method: "POST",
77
- headers: {
78
- "Content-Type": "application/json",
79
- Authorization: `Bearer ${apiKey}`,
80
- },
81
- body: JSON.stringify({
82
- input: texts,
83
- model: voyageConfig.model,
84
- input_type: inputType,
85
- }),
113
+ const response = await client.embed({
114
+ input: texts,
115
+ model: voyageConfig.model,
116
+ inputType: inputType,
86
117
  });
87
-
88
- if (!response.ok) {
89
- const error = await response.text();
90
- throw new Error(`Voyage API error: ${response.status} - ${error}`);
118
+ if (!response.data) {
119
+ throw new Error("Voyage API: 응답 데이터가 없습니다.");
91
120
  }
92
121
 
93
- const data = await response.json();
94
-
95
- return data.data.map((item: { embedding: number[] }) => ({
96
- embedding: item.embedding,
122
+ return response.data.map((item) => ({
123
+ embedding: item.embedding ?? [],
97
124
  model: voyageConfig.model,
98
- tokenCount: data.usage?.total_tokens || 0,
125
+ tokenCount: response.usage?.totalTokens ?? 0,
99
126
  }));
100
127
  }
101
128
 
@@ -103,74 +130,22 @@ export class Embedding {
103
130
  * OpenAI 임베딩
104
131
  */
105
132
  private async embedOpenAI(texts: string[]): Promise<EmbeddingResult[]> {
133
+ const openai = this.getOpenAIProvider();
106
134
  const openaiConfig = this.config.openai;
135
+ const model = openai.embeddingModel(openaiConfig.model);
107
136
 
108
- // config에서 설정된 apiKey 우선, 없으면 Sonamu.secrets에서 로드
109
- const apiKey = openaiConfig.apiKey || Sonamu.secrets?.openai_api_key;
110
- if (!apiKey) {
111
- throw new Error(
112
- "OPENAI_API_KEY가 설정되지 않았습니다. 환경변수를 확인하세요."
113
- );
114
- }
115
-
116
- const response = await fetch(openaiConfig.baseUrl, {
117
- method: "POST",
118
- headers: {
119
- "Content-Type": "application/json",
120
- Authorization: `Bearer ${apiKey}`,
121
- },
122
- body: JSON.stringify({
123
- input: texts,
124
- model: openaiConfig.model,
125
- }),
137
+ const { embeddings, usage } = await embedMany({
138
+ model: model as EmbeddingModel,
139
+ values: texts,
126
140
  });
127
141
 
128
- if (!response.ok) {
129
- const error = await response.text();
130
- throw new Error(`OpenAI API error: ${response.status} - ${error}`);
131
- }
132
-
133
- const data = await response.json();
134
-
135
- return data.data.map((item: { embedding: number[] }) => ({
136
- embedding: item.embedding,
142
+ return embeddings.map((embedding) => ({
143
+ embedding,
137
144
  model: openaiConfig.model,
138
- tokenCount: data.usage?.total_tokens || 0,
145
+ tokenCount: usage?.tokens ?? 0,
139
146
  }));
140
147
  }
141
148
 
142
- /**
143
- * 배치 임베딩 (대량 처리)
144
- */
145
- async embedBatch(
146
- texts: string[],
147
- provider: EmbeddingProvider,
148
- inputType: VectorInputType = "document",
149
- onProgress?: ProgressCallback
150
- ): Promise<EmbeddingResult[]> {
151
- const batchSize =
152
- provider === "voyage"
153
- ? this.config.voyage.batchSize
154
- : this.config.openai.batchSize;
155
-
156
- const results: EmbeddingResult[] = [];
157
-
158
- for (let i = 0; i < texts.length; i += batchSize) {
159
- const batch = texts.slice(i, i + batchSize);
160
- const batchResults = await this.embed(batch, provider, inputType);
161
- results.push(...batchResults);
162
-
163
- onProgress?.(Math.min(i + batchSize, texts.length), texts.length);
164
-
165
- // Rate limiting (100ms between batches)
166
- if (i + batchSize < texts.length) {
167
- await this.delay(100);
168
- }
169
- }
170
-
171
- return results;
172
- }
173
-
174
149
  /**
175
150
  * 벡터를 PostgreSQL vector 타입 문자열로 변환
176
151
  */
@@ -182,12 +157,6 @@ export class Embedding {
182
157
  * 임베딩 provider의 차원 수 반환
183
158
  */
184
159
  getDimensions(provider: EmbeddingProvider): number {
185
- return provider === "voyage"
186
- ? this.config.voyage.dimensions
187
- : this.config.openai.dimensions;
188
- }
189
-
190
- private delay(ms: number): Promise<void> {
191
- return new Promise((resolve) => setTimeout(resolve, ms));
160
+ return provider === "voyage" ? this.config.voyage.dimensions : this.config.openai.dimensions;
192
161
  }
193
162
  }
@@ -64,7 +64,7 @@ export class VectorSearch<T = Record<string, unknown>> {
64
64
  onProgress?: ProgressCallback,
65
65
  ): Promise<void> {
66
66
  const texts = items.map((item) => item.text);
67
- const embeddings = await this.embedding.embedBatch(texts, provider, "document", onProgress);
67
+ const embeddings = await this.embedding.embed(texts, provider, "document", onProgress);
68
68
 
69
69
  await this.db.transaction(async (trx) => {
70
70
  for (let i = 0; i < items.length; i++) {