sonamu 0.7.11 → 0.7.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/dist/api/config.d.ts +10 -3
  2. package/dist/api/config.d.ts.map +1 -1
  3. package/dist/api/config.js +2 -1
  4. package/dist/api/sonamu.d.ts +4 -0
  5. package/dist/api/sonamu.d.ts.map +1 -1
  6. package/dist/api/sonamu.js +36 -2
  7. package/dist/bin/cli.js +121 -117
  8. package/dist/database/base-model.d.ts +10 -50
  9. package/dist/database/base-model.d.ts.map +1 -1
  10. package/dist/database/base-model.js +19 -84
  11. package/dist/database/base-model.types.d.ts +4 -4
  12. package/dist/database/base-model.types.d.ts.map +1 -1
  13. package/dist/database/base-model.types.js +1 -1
  14. package/dist/database/db.d.ts +1 -0
  15. package/dist/database/db.d.ts.map +1 -1
  16. package/dist/database/db.js +24 -13
  17. package/dist/database/puri-subset.test-d.js +1 -1
  18. package/dist/database/puri-subset.types.d.ts +1 -0
  19. package/dist/database/puri-subset.types.d.ts.map +1 -1
  20. package/dist/database/puri-subset.types.js +2 -2
  21. package/dist/database/puri.d.ts +82 -3
  22. package/dist/database/puri.d.ts.map +1 -1
  23. package/dist/database/puri.js +180 -14
  24. package/dist/database/puri.types.d.ts +33 -6
  25. package/dist/database/puri.types.d.ts.map +1 -1
  26. package/dist/database/puri.types.js +1 -1
  27. package/dist/database/puri.types.test-d.js +1 -1
  28. package/dist/entity/entity-manager.d.ts +5 -4
  29. package/dist/entity/entity-manager.d.ts.map +1 -1
  30. package/dist/entity/entity-manager.js +8 -1
  31. package/dist/index.d.ts +1 -1
  32. package/dist/index.d.ts.map +1 -1
  33. package/dist/index.js +3 -3
  34. package/dist/migration/code-generation.d.ts.map +1 -1
  35. package/dist/migration/code-generation.js +33 -2
  36. package/dist/migration/postgresql-schema-reader.d.ts.map +1 -1
  37. package/dist/migration/postgresql-schema-reader.js +53 -22
  38. package/dist/naite/messaging-types.d.ts.map +1 -1
  39. package/dist/naite/messaging-types.js +1 -1
  40. package/dist/naite/naite.js +2 -2
  41. package/dist/stream/sse.d.ts +2 -6
  42. package/dist/stream/sse.d.ts.map +1 -1
  43. package/dist/stream/sse.js +9 -3
  44. package/dist/syncer/api-parser.js +5 -1
  45. package/dist/syncer/file-patterns.d.ts +1 -1
  46. package/dist/syncer/file-patterns.d.ts.map +1 -1
  47. package/dist/syncer/file-patterns.js +6 -5
  48. package/dist/syncer/module-loader.d.ts +5 -0
  49. package/dist/syncer/module-loader.d.ts.map +1 -1
  50. package/dist/syncer/module-loader.js +17 -1
  51. package/dist/syncer/syncer.d.ts +3 -0
  52. package/dist/syncer/syncer.d.ts.map +1 -1
  53. package/dist/syncer/syncer.js +12 -2
  54. package/dist/tasks/decorator.d.ts +26 -0
  55. package/dist/tasks/decorator.d.ts.map +1 -0
  56. package/dist/tasks/decorator.js +28 -0
  57. package/dist/tasks/step-wrapper.d.ts +18 -0
  58. package/dist/tasks/step-wrapper.d.ts.map +1 -0
  59. package/dist/tasks/step-wrapper.js +38 -0
  60. package/dist/tasks/workflow-manager.d.ts +40 -0
  61. package/dist/tasks/workflow-manager.d.ts.map +1 -0
  62. package/dist/tasks/workflow-manager.js +193 -0
  63. package/dist/template/implementations/generated.template.d.ts.map +1 -1
  64. package/dist/template/implementations/generated.template.js +7 -3
  65. package/dist/types/types.d.ts +27 -11
  66. package/dist/types/types.d.ts.map +1 -1
  67. package/dist/types/types.js +15 -2
  68. package/dist/utils/formatter.d.ts.map +1 -1
  69. package/dist/utils/formatter.js +10 -2
  70. package/dist/utils/model.d.ts +9 -2
  71. package/dist/utils/model.d.ts.map +1 -1
  72. package/dist/utils/model.js +16 -1
  73. package/dist/utils/type-utils.d.ts.map +1 -1
  74. package/dist/utils/type-utils.js +3 -1
  75. package/dist/vector/embedding.d.ts +2 -5
  76. package/dist/vector/embedding.d.ts.map +1 -1
  77. package/dist/vector/embedding.js +3 -7
  78. package/dist/vector/types.d.ts.map +1 -1
  79. package/dist/vector/types.js +1 -1
  80. package/package.json +5 -3
  81. package/src/api/config.ts +15 -8
  82. package/src/api/sonamu.ts +43 -2
  83. package/src/bin/cli.ts +58 -54
  84. package/src/database/base-model.ts +21 -128
  85. package/src/database/base-model.types.ts +3 -4
  86. package/src/database/db.ts +28 -18
  87. package/src/database/puri-subset.test-d.ts +1 -0
  88. package/src/database/puri-subset.types.ts +2 -0
  89. package/src/database/puri.ts +238 -27
  90. package/src/database/puri.types.test-d.ts +1 -1
  91. package/src/database/puri.types.ts +49 -6
  92. package/src/entity/entity-manager.ts +9 -0
  93. package/src/index.ts +1 -1
  94. package/src/migration/code-generation.ts +40 -1
  95. package/src/migration/postgresql-schema-reader.ts +53 -22
  96. package/src/naite/messaging-types.ts +43 -44
  97. package/src/naite/naite.ts +1 -1
  98. package/src/shared/app.shared.ts.txt +13 -0
  99. package/src/shared/web.shared.ts.txt +13 -0
  100. package/src/stream/sse.ts +15 -3
  101. package/src/syncer/api-parser.ts +4 -0
  102. package/src/syncer/file-patterns.ts +11 -9
  103. package/src/syncer/module-loader.ts +35 -0
  104. package/src/syncer/syncer.ts +14 -0
  105. package/src/tasks/decorator.ts +71 -0
  106. package/src/tasks/step-wrapper.ts +84 -0
  107. package/src/tasks/workflow-manager.ts +330 -0
  108. package/src/template/implementations/generated.template.ts +19 -6
  109. package/src/types/types.ts +20 -4
  110. package/src/utils/formatter.ts +8 -1
  111. package/src/utils/model.ts +26 -2
  112. package/src/utils/type-utils.ts +2 -0
  113. package/src/vector/embedding.ts +2 -8
  114. package/src/vector/types.ts +1 -2
  115. package/dist/vector/vector-search.d.ts +0 -47
  116. package/dist/vector/vector-search.d.ts.map +0 -1
  117. package/dist/vector/vector-search.js +0 -176
  118. package/src/vector/vector-search.ts +0 -261
@@ -15,7 +15,7 @@ import type {
15
15
  * 임베딩 클라이언트
16
16
  * Voyage AI와 OpenAI 임베딩을 SDK 방식으로 통합 지원
17
17
  */
18
- export class Embedding {
18
+ export class EmbeddingClass {
19
19
  private config: VectorConfig;
20
20
 
21
21
  constructor(config: Partial<VectorConfig> = {}) {
@@ -146,13 +146,6 @@ export class Embedding {
146
146
  }));
147
147
  }
148
148
 
149
- /**
150
- * 벡터를 PostgreSQL vector 타입 문자열로 변환
151
- */
152
- static toVectorString(embedding: number[]): string {
153
- return `[${embedding.join(",")}]`;
154
- }
155
-
156
149
  /**
157
150
  * 임베딩 provider의 차원 수 반환
158
151
  */
@@ -160,3 +153,4 @@ export class Embedding {
160
153
  return provider === "voyage" ? this.config.voyage.dimensions : this.config.openai.dimensions;
161
154
  }
162
155
  }
156
+ export const Embedding = new EmbeddingClass();
@@ -31,8 +31,7 @@ export interface VectorSearchResult<T = Record<string, unknown>> {
31
31
  }
32
32
 
33
33
  /** 하이브리드 검색 결과 (Vector + FTS) */
34
- export interface HybridSearchResult<T = Record<string, unknown>>
35
- extends VectorSearchResult<T> {
34
+ export interface HybridSearchResult<T = Record<string, unknown>> extends VectorSearchResult<T> {
36
35
  vectorScore?: number;
37
36
  ftsScore?: number;
38
37
  }
@@ -1,47 +0,0 @@
1
- import type { Knex } from "knex";
2
- import { Embedding } from "./embedding";
3
- import type { EmbeddingItem, EmbeddingProvider, HybridSearchOptions, HybridSearchResult, ProgressCallback, VectorConfig, VectorSearchOptions, VectorSearchResult } from "./types";
4
- /**
5
- * 벡터 검색
6
- * pgvector를 활용한 벡터 검색 및 하이브리드 검색 지원
7
- */
8
- export declare class VectorSearch<T = Record<string, unknown>> {
9
- private db;
10
- private config;
11
- private embedding;
12
- private tableName;
13
- constructor(db: Knex, tableName: string, config?: Partial<VectorConfig>);
14
- /**
15
- * 단일 항목에 임베딩 저장
16
- */
17
- saveEmbedding(id: number, text: string, provider: EmbeddingProvider, embeddingColumn?: string): Promise<void>;
18
- /**
19
- * 여러 항목에 임베딩 일괄 저장
20
- */
21
- saveEmbeddingsBatch(items: EmbeddingItem[], provider: EmbeddingProvider, embeddingColumn?: string, onProgress?: ProgressCallback): Promise<void>;
22
- /**
23
- * 벡터 검색 (코사인 유사도)
24
- */
25
- search(query: string, provider: EmbeddingProvider, options?: VectorSearchOptions): Promise<VectorSearchResult<T>[]>;
26
- /**
27
- * 하이브리드 검색 (Vector + FTS)
28
- */
29
- hybridSearch(query: string, provider: EmbeddingProvider, options?: HybridSearchOptions): Promise<HybridSearchResult<T>[]>;
30
- /**
31
- * 임베딩 현황 조회
32
- */
33
- getEmbeddingStatus(embeddingColumn?: string): Promise<{
34
- total: number;
35
- withEmbedding: number;
36
- withoutEmbedding: number;
37
- }>;
38
- /**
39
- * 임베딩이 없는 항목 ID 조회
40
- */
41
- getItemsWithoutEmbedding(embeddingColumn?: string, limit?: number): Promise<number[]>;
42
- /**
43
- * Embedding 인스턴스 반환 (고급 사용)
44
- */
45
- getEmbedding(): Embedding;
46
- }
47
- //# sourceMappingURL=vector-search.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"vector-search.d.ts","sourceRoot":"","sources":["../../src/vector/vector-search.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAGjC,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,KAAK,EACV,aAAa,EACb,iBAAiB,EACjB,mBAAmB,EACnB,kBAAkB,EAClB,gBAAgB,EAChB,YAAY,EACZ,mBAAmB,EACnB,kBAAkB,EACnB,MAAM,SAAS,CAAC;AAEjB;;;GAGG;AACH,qBAAa,YAAY,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC;IACnD,OAAO,CAAC,EAAE,CAAO;IACjB,OAAO,CAAC,MAAM,CAAe;IAC7B,OAAO,CAAC,SAAS,CAAY;IAC7B,OAAO,CAAC,SAAS,CAAS;gBAEd,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,MAAM,GAAE,OAAO,CAAC,YAAY,CAAM;IAa3E;;OAEG;IACG,aAAa,CACjB,EAAE,EAAE,MAAM,EACV,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,iBAAiB,EAC3B,eAAe,GAAE,MAA4B,GAC5C,OAAO,CAAC,IAAI,CAAC;IAUhB;;OAEG;IACG,mBAAmB,CACvB,KAAK,EAAE,aAAa,EAAE,EACtB,QAAQ,EAAE,iBAAiB,EAC3B,eAAe,GAAE,MAA4B,EAC7C,UAAU,CAAC,EAAE,gBAAgB,GAC5B,OAAO,CAAC,IAAI,CAAC;IAehB;;OAEG;IACG,MAAM,CACV,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,iBAAiB,EAC3B,OAAO,GAAE,mBAAwB,GAChC,OAAO,CAAC,kBAAkB,CAAC,CAAC,CAAC,EAAE,CAAC;IAyCnC;;OAEG;IACG,YAAY,CAChB,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,iBAAiB,EAC3B,OAAO,GAAE,mBAAwB,GAChC,OAAO,CAAC,kBAAkB,CAAC,CAAC,CAAC,EAAE,CAAC;IAiFnC;;OAEG;IACG,kBAAkB,CAAC,eAAe,GAAE,MAA4B,GAAG,OAAO,CAAC;QAC/E,KAAK,EAAE,MAAM,CAAC;QACd,aAAa,EAAE,MAAM,CAAC;QACtB,gBAAgB,EAAE,MAAM,CAAC;KAC1B,CAAC;IAgBF;;OAEG;IACG,wBAAwB,CAC5B,eAAe,GAAE,MAA4B,EAC7C,KAAK,GAAE,MAAY,GAClB,OAAO,CAAC,MAAM,EAAE,CAAC;IAUpB;;OAEG;IACH,YAAY,IAAI,SAAS;CAG1B"}
@@ -1,176 +0,0 @@
1
- import pgvector from "pgvector/knex";
2
- import { DEFAULT_VECTOR_CONFIG } from "./config.js";
3
- import { Embedding } from "./embedding.js";
4
- /**
5
- * 벡터 검색
6
- * pgvector를 활용한 벡터 검색 및 하이브리드 검색 지원
7
- */ export class VectorSearch {
8
- db;
9
- config;
10
- embedding;
11
- tableName;
12
- constructor(db, tableName, config = {}){
13
- this.db = db;
14
- this.tableName = tableName;
15
- this.config = {
16
- voyage: {
17
- ...DEFAULT_VECTOR_CONFIG.voyage,
18
- ...config.voyage
19
- },
20
- openai: {
21
- ...DEFAULT_VECTOR_CONFIG.openai,
22
- ...config.openai
23
- },
24
- chunking: {
25
- ...DEFAULT_VECTOR_CONFIG.chunking,
26
- ...config.chunking
27
- },
28
- search: {
29
- ...DEFAULT_VECTOR_CONFIG.search,
30
- ...config.search
31
- },
32
- pgvector: {
33
- ...DEFAULT_VECTOR_CONFIG.pgvector,
34
- ...config.pgvector
35
- }
36
- };
37
- this.embedding = new Embedding(config);
38
- }
39
- /**
40
- * 단일 항목에 임베딩 저장
41
- */ async saveEmbedding(id, text, provider, embeddingColumn = "content_embedding") {
42
- const { embedding } = await this.embedding.embedOne(text, provider, "document");
43
- await this.db(this.tableName).where("id", id).update({
44
- [embeddingColumn]: pgvector.toSql(embedding)
45
- });
46
- }
47
- /**
48
- * 여러 항목에 임베딩 일괄 저장
49
- */ async saveEmbeddingsBatch(items, provider, embeddingColumn = "content_embedding", onProgress) {
50
- const texts = items.map((item)=>item.text);
51
- const embeddings = await this.embedding.embed(texts, provider, "document", onProgress);
52
- await this.db.transaction(async (trx)=>{
53
- for(let i = 0; i < items.length; i++){
54
- await trx(this.tableName).where("id", items[i].id).update({
55
- [embeddingColumn]: pgvector.toSql(embeddings[i].embedding)
56
- });
57
- }
58
- });
59
- }
60
- /**
61
- * 벡터 검색 (코사인 유사도)
62
- */ async search(query, provider, options = {}) {
63
- const { embeddingColumn = "content_embedding", limit = this.config.search.defaultLimit, threshold = this.config.search.similarityThreshold, where } = options;
64
- // 쿼리 임베딩 (input_type: 'query' 중요!)
65
- const { embedding } = await this.embedding.embedOne(query, provider, "query");
66
- // pgvector 세션 설정
67
- if (this.config.pgvector.iterativeScan) {
68
- await this.db.raw("SET hnsw.iterative_scan = relaxed_order");
69
- }
70
- await this.db.raw(`SET hnsw.ef_search = ${this.config.pgvector.efSearch}`);
71
- // 코사인 유사도 = 1 - 코사인 거리
72
- const vectorStr = pgvector.toSql(embedding);
73
- let queryBuilder = this.db(this.tableName).select("*").select(this.db.raw(`1 - (${embeddingColumn} <=> ?::vector) AS similarity`, [
74
- vectorStr
75
- ])).whereNotNull(embeddingColumn).orderByRaw(`${embeddingColumn} <=> ?::vector`, [
76
- vectorStr
77
- ]).limit(limit);
78
- if (where) {
79
- queryBuilder = queryBuilder.whereRaw(where);
80
- }
81
- const rows = await queryBuilder;
82
- return rows.filter((row)=>row.similarity >= threshold).map((row)=>({
83
- id: row.id,
84
- similarity: parseFloat(String(row.similarity)),
85
- data: row
86
- }));
87
- }
88
- /**
89
- * 하이브리드 검색 (Vector + FTS)
90
- */ async hybridSearch(query, provider, options = {}) {
91
- const { embeddingColumn = "content_embedding", ftsColumn = "content_tsv", limit = this.config.search.defaultLimit, vectorWeight = this.config.search.vectorWeight, ftsWeight = this.config.search.ftsWeight } = options;
92
- const { embedding } = await this.embedding.embedOne(query, provider, "query");
93
- const vectorStr = pgvector.toSql(embedding);
94
- // pgvector 세션 설정
95
- if (this.config.pgvector.iterativeScan) {
96
- await this.db.raw("SET hnsw.iterative_scan = relaxed_order");
97
- }
98
- await this.db.raw(`SET hnsw.ef_search = ${this.config.pgvector.efSearch}`);
99
- const sql = `
100
- WITH vector_search AS (
101
- SELECT
102
- id,
103
- ROW_NUMBER() OVER (ORDER BY ${embeddingColumn} <=> ?::vector) AS rank
104
- FROM ${this.tableName}
105
- WHERE ${embeddingColumn} IS NOT NULL
106
- ORDER BY ${embeddingColumn} <=> ?::vector
107
- LIMIT 50
108
- ),
109
- fts_search AS (
110
- SELECT
111
- id,
112
- ROW_NUMBER() OVER (ORDER BY ts_rank(${ftsColumn}, query) DESC) AS rank
113
- FROM ${this.tableName}, plainto_tsquery('simple', ?) query
114
- WHERE ${ftsColumn} @@ query
115
- LIMIT 50
116
- ),
117
- combined AS (
118
- SELECT
119
- COALESCE(v.id, f.id) AS id,
120
- COALESCE(1.0 / (60 + v.rank), 0) AS vector_score,
121
- COALESCE(1.0 / (60 + f.rank), 0) AS fts_score
122
- FROM vector_search v
123
- FULL OUTER JOIN fts_search f ON v.id = f.id
124
- )
125
- SELECT
126
- t.*,
127
- c.vector_score,
128
- c.fts_score,
129
- (c.vector_score * ? + c.fts_score * ?) AS similarity
130
- FROM combined c
131
- JOIN ${this.tableName} t ON c.id = t.id
132
- ORDER BY similarity DESC
133
- LIMIT ?
134
- `;
135
- const { rows } = await this.db.raw(sql, [
136
- vectorStr,
137
- vectorStr,
138
- query,
139
- vectorWeight,
140
- ftsWeight,
141
- limit
142
- ]);
143
- return rows.map((row)=>({
144
- id: row.id,
145
- similarity: parseFloat(String(row.similarity)),
146
- vectorScore: parseFloat(String(row.vector_score)),
147
- ftsScore: parseFloat(String(row.fts_score)),
148
- data: row
149
- }));
150
- }
151
- /**
152
- * 임베딩 현황 조회
153
- */ async getEmbeddingStatus(embeddingColumn = "content_embedding") {
154
- const result = await this.db(this.tableName).count("* as total").count(`${embeddingColumn} as with_embedding`).first();
155
- const total = parseInt(String(result?.total ?? 0), 10);
156
- const withEmbedding = parseInt(String(result?.with_embedding ?? 0), 10);
157
- return {
158
- total,
159
- withEmbedding,
160
- withoutEmbedding: total - withEmbedding
161
- };
162
- }
163
- /**
164
- * 임베딩이 없는 항목 ID 조회
165
- */ async getItemsWithoutEmbedding(embeddingColumn = "content_embedding", limit = 100) {
166
- const rows = await this.db(this.tableName).select("id").whereNull(embeddingColumn).orderBy("id").limit(limit);
167
- return rows.map((row)=>row.id);
168
- }
169
- /**
170
- * Embedding 인스턴스 반환 (고급 사용)
171
- */ getEmbedding() {
172
- return this.embedding;
173
- }
174
- }
175
-
176
- //# sourceMappingURL=data:application/json;base64,
@@ -1,261 +0,0 @@
1
- import type { Knex } from "knex";
2
- import pgvector from "pgvector/knex";
3
- import { DEFAULT_VECTOR_CONFIG } from "./config";
4
- import { Embedding } from "./embedding";
5
- import type {
6
- EmbeddingItem,
7
- EmbeddingProvider,
8
- HybridSearchOptions,
9
- HybridSearchResult,
10
- ProgressCallback,
11
- VectorConfig,
12
- VectorSearchOptions,
13
- VectorSearchResult,
14
- } from "./types";
15
-
16
- /**
17
- * 벡터 검색
18
- * pgvector를 활용한 벡터 검색 및 하이브리드 검색 지원
19
- */
20
- export class VectorSearch<T = Record<string, unknown>> {
21
- private db: Knex;
22
- private config: VectorConfig;
23
- private embedding: Embedding;
24
- private tableName: string;
25
-
26
- constructor(db: Knex, tableName: string, config: Partial<VectorConfig> = {}) {
27
- this.db = db;
28
- this.tableName = tableName;
29
- this.config = {
30
- voyage: { ...DEFAULT_VECTOR_CONFIG.voyage, ...config.voyage },
31
- openai: { ...DEFAULT_VECTOR_CONFIG.openai, ...config.openai },
32
- chunking: { ...DEFAULT_VECTOR_CONFIG.chunking, ...config.chunking },
33
- search: { ...DEFAULT_VECTOR_CONFIG.search, ...config.search },
34
- pgvector: { ...DEFAULT_VECTOR_CONFIG.pgvector, ...config.pgvector },
35
- };
36
- this.embedding = new Embedding(config);
37
- }
38
-
39
- /**
40
- * 단일 항목에 임베딩 저장
41
- */
42
- async saveEmbedding(
43
- id: number,
44
- text: string,
45
- provider: EmbeddingProvider,
46
- embeddingColumn: string = "content_embedding",
47
- ): Promise<void> {
48
- const { embedding } = await this.embedding.embedOne(text, provider, "document");
49
-
50
- await this.db(this.tableName)
51
- .where("id", id)
52
- .update({
53
- [embeddingColumn]: pgvector.toSql(embedding),
54
- });
55
- }
56
-
57
- /**
58
- * 여러 항목에 임베딩 일괄 저장
59
- */
60
- async saveEmbeddingsBatch(
61
- items: EmbeddingItem[],
62
- provider: EmbeddingProvider,
63
- embeddingColumn: string = "content_embedding",
64
- onProgress?: ProgressCallback,
65
- ): Promise<void> {
66
- const texts = items.map((item) => item.text);
67
- const embeddings = await this.embedding.embed(texts, provider, "document", onProgress);
68
-
69
- await this.db.transaction(async (trx) => {
70
- for (let i = 0; i < items.length; i++) {
71
- await trx(this.tableName)
72
- .where("id", items[i].id)
73
- .update({
74
- [embeddingColumn]: pgvector.toSql(embeddings[i].embedding),
75
- });
76
- }
77
- });
78
- }
79
-
80
- /**
81
- * 벡터 검색 (코사인 유사도)
82
- */
83
- async search(
84
- query: string,
85
- provider: EmbeddingProvider,
86
- options: VectorSearchOptions = {},
87
- ): Promise<VectorSearchResult<T>[]> {
88
- const {
89
- embeddingColumn = "content_embedding",
90
- limit = this.config.search.defaultLimit,
91
- threshold = this.config.search.similarityThreshold,
92
- where,
93
- } = options;
94
-
95
- // 쿼리 임베딩 (input_type: 'query' 중요!)
96
- const { embedding } = await this.embedding.embedOne(query, provider, "query");
97
-
98
- // pgvector 세션 설정
99
- if (this.config.pgvector.iterativeScan) {
100
- await this.db.raw("SET hnsw.iterative_scan = relaxed_order");
101
- }
102
- await this.db.raw(`SET hnsw.ef_search = ${this.config.pgvector.efSearch}`);
103
-
104
- // 코사인 유사도 = 1 - 코사인 거리
105
- const vectorStr = pgvector.toSql(embedding);
106
- let queryBuilder = this.db(this.tableName)
107
- .select("*")
108
- .select(this.db.raw(`1 - (${embeddingColumn} <=> ?::vector) AS similarity`, [vectorStr]))
109
- .whereNotNull(embeddingColumn)
110
- .orderByRaw(`${embeddingColumn} <=> ?::vector`, [vectorStr])
111
- .limit(limit);
112
-
113
- if (where) {
114
- queryBuilder = queryBuilder.whereRaw(where);
115
- }
116
-
117
- const rows = await queryBuilder;
118
-
119
- return rows
120
- .filter((row: { similarity: number }) => row.similarity >= threshold)
121
- .map((row: T & { similarity: number }) => ({
122
- id: (row as unknown as { id: number }).id,
123
- similarity: parseFloat(String(row.similarity)),
124
- data: row as T,
125
- }));
126
- }
127
-
128
- /**
129
- * 하이브리드 검색 (Vector + FTS)
130
- */
131
- async hybridSearch(
132
- query: string,
133
- provider: EmbeddingProvider,
134
- options: HybridSearchOptions = {},
135
- ): Promise<HybridSearchResult<T>[]> {
136
- const {
137
- embeddingColumn = "content_embedding",
138
- ftsColumn = "content_tsv",
139
- limit = this.config.search.defaultLimit,
140
- vectorWeight = this.config.search.vectorWeight,
141
- ftsWeight = this.config.search.ftsWeight,
142
- } = options;
143
-
144
- const { embedding } = await this.embedding.embedOne(query, provider, "query");
145
- const vectorStr = pgvector.toSql(embedding);
146
-
147
- // pgvector 세션 설정
148
- if (this.config.pgvector.iterativeScan) {
149
- await this.db.raw("SET hnsw.iterative_scan = relaxed_order");
150
- }
151
- await this.db.raw(`SET hnsw.ef_search = ${this.config.pgvector.efSearch}`);
152
-
153
- const sql = `
154
- WITH vector_search AS (
155
- SELECT
156
- id,
157
- ROW_NUMBER() OVER (ORDER BY ${embeddingColumn} <=> ?::vector) AS rank
158
- FROM ${this.tableName}
159
- WHERE ${embeddingColumn} IS NOT NULL
160
- ORDER BY ${embeddingColumn} <=> ?::vector
161
- LIMIT 50
162
- ),
163
- fts_search AS (
164
- SELECT
165
- id,
166
- ROW_NUMBER() OVER (ORDER BY ts_rank(${ftsColumn}, query) DESC) AS rank
167
- FROM ${this.tableName}, plainto_tsquery('simple', ?) query
168
- WHERE ${ftsColumn} @@ query
169
- LIMIT 50
170
- ),
171
- combined AS (
172
- SELECT
173
- COALESCE(v.id, f.id) AS id,
174
- COALESCE(1.0 / (60 + v.rank), 0) AS vector_score,
175
- COALESCE(1.0 / (60 + f.rank), 0) AS fts_score
176
- FROM vector_search v
177
- FULL OUTER JOIN fts_search f ON v.id = f.id
178
- )
179
- SELECT
180
- t.*,
181
- c.vector_score,
182
- c.fts_score,
183
- (c.vector_score * ? + c.fts_score * ?) AS similarity
184
- FROM combined c
185
- JOIN ${this.tableName} t ON c.id = t.id
186
- ORDER BY similarity DESC
187
- LIMIT ?
188
- `;
189
-
190
- const { rows } = await this.db.raw(sql, [
191
- vectorStr,
192
- vectorStr,
193
- query,
194
- vectorWeight,
195
- ftsWeight,
196
- limit,
197
- ]);
198
-
199
- return rows.map(
200
- (
201
- row: T & {
202
- similarity: number;
203
- vector_score: number;
204
- fts_score: number;
205
- },
206
- ) => ({
207
- id: (row as unknown as { id: number }).id,
208
- similarity: parseFloat(String(row.similarity)),
209
- vectorScore: parseFloat(String(row.vector_score)),
210
- ftsScore: parseFloat(String(row.fts_score)),
211
- data: row as T,
212
- }),
213
- );
214
- }
215
-
216
- /**
217
- * 임베딩 현황 조회
218
- */
219
- async getEmbeddingStatus(embeddingColumn: string = "content_embedding"): Promise<{
220
- total: number;
221
- withEmbedding: number;
222
- withoutEmbedding: number;
223
- }> {
224
- const result = await this.db(this.tableName)
225
- .count("* as total")
226
- .count(`${embeddingColumn} as with_embedding`)
227
- .first();
228
-
229
- const total = parseInt(String(result?.total ?? 0), 10);
230
- const withEmbedding = parseInt(String(result?.with_embedding ?? 0), 10);
231
-
232
- return {
233
- total,
234
- withEmbedding,
235
- withoutEmbedding: total - withEmbedding,
236
- };
237
- }
238
-
239
- /**
240
- * 임베딩이 없는 항목 ID 조회
241
- */
242
- async getItemsWithoutEmbedding(
243
- embeddingColumn: string = "content_embedding",
244
- limit: number = 100,
245
- ): Promise<number[]> {
246
- const rows = await this.db(this.tableName)
247
- .select("id")
248
- .whereNull(embeddingColumn)
249
- .orderBy("id")
250
- .limit(limit);
251
-
252
- return rows.map((row: { id: number }) => row.id);
253
- }
254
-
255
- /**
256
- * Embedding 인스턴스 반환 (고급 사용)
257
- */
258
- getEmbedding(): Embedding {
259
- return this.embedding;
260
- }
261
- }