@nextsparkjs/plugin-ai 0.1.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/.env.example +79 -0
  2. package/README.md +529 -0
  3. package/api/README.md +65 -0
  4. package/api/ai-history/[id]/route.ts +112 -0
  5. package/api/embeddings/route.ts +129 -0
  6. package/api/generate/route.ts +160 -0
  7. package/docs/01-getting-started/01-introduction.md +237 -0
  8. package/docs/01-getting-started/02-installation.md +447 -0
  9. package/docs/01-getting-started/03-configuration.md +416 -0
  10. package/docs/02-features/01-text-generation.md +523 -0
  11. package/docs/02-features/02-embeddings.md +241 -0
  12. package/docs/02-features/03-ai-history.md +549 -0
  13. package/docs/03-advanced-usage/01-core-utilities.md +500 -0
  14. package/docs/04-use-cases/01-content-generation.md +453 -0
  15. package/entities/ai-history/ai-history.config.ts +123 -0
  16. package/entities/ai-history/ai-history.fields.ts +330 -0
  17. package/entities/ai-history/messages/en.json +56 -0
  18. package/entities/ai-history/messages/es.json +56 -0
  19. package/entities/ai-history/migrations/001_ai_history_table.sql +167 -0
  20. package/entities/ai-history/migrations/002_ai_history_metas.sql +103 -0
  21. package/lib/ai-history-meta-service.ts +379 -0
  22. package/lib/ai-history-service.ts +391 -0
  23. package/lib/ai-sdk.ts +7 -0
  24. package/lib/core-utils.ts +217 -0
  25. package/lib/plugin-env.ts +252 -0
  26. package/lib/sanitize.ts +122 -0
  27. package/lib/save-example.ts +237 -0
  28. package/lib/server-env.ts +104 -0
  29. package/package.json +23 -0
  30. package/plugin.config.ts +55 -0
  31. package/public/docs/login-404-error.png +0 -0
  32. package/tsconfig.json +47 -0
  33. package/tsconfig.tsbuildinfo +1 -0
  34. package/types/ai.types.ts +51 -0
@@ -0,0 +1,241 @@
1
+ ---
2
+ title: Text Embeddings
3
+ description: Generate and use text embeddings for semantic search
4
+ ---
5
+
6
+ # Text Embeddings
7
+
8
+ Text embeddings convert text into high-dimensional vectors that capture semantic meaning, enabling powerful search and recommendation features.
9
+
10
+ ## What are Embeddings?
11
+
12
+ Embeddings are numerical representations of text where similar concepts are close together in vector space. This enables:
13
+
14
+ - **Semantic Search**: Find content by meaning, not just keywords
15
+ - **Recommendations**: Suggest similar content based on semantic similarity
16
+ - **Clustering**: Group related documents automatically
17
+ - **Anomaly Detection**: Identify unusual or out-of-place content
18
+
19
+ ## Generating Embeddings
20
+
21
+ ### Basic Example
22
+
23
+ ```typescript
24
+ import { openai } from '@/plugins/ai/lib/openai'
25
+
26
+ async function generateEmbedding(text: string) {
27
+ const response = await openai.embeddings.create({
28
+ model: 'text-embedding-3-small',
29
+ input: text
30
+ })
31
+
32
+ return response.data[0].embedding
33
+ }
34
+
35
+ // Usage
36
+ const embedding = await generateEmbedding(
37
+ 'This is a sample text to embed'
38
+ )
39
+
40
+ console.log(embedding) // [0.123, -0.456, 0.789, ...]
41
+ ```
42
+
43
+ ### Batch Processing
44
+
45
+ Process multiple texts efficiently:
46
+
47
+ ```typescript
48
+ async function batchEmbeddings(texts: string[]) {
49
+ const response = await openai.embeddings.create({
50
+ model: 'text-embedding-3-small',
51
+ input: texts
52
+ })
53
+
54
+ return response.data.map(d => d.embedding)
55
+ }
56
+
57
+ // Generate embeddings for multiple documents
58
+ const embeddings = await batchEmbeddings([
59
+ 'First document text',
60
+ 'Second document text',
61
+ 'Third document text'
62
+ ])
63
+ ```
64
+
65
+ ## Semantic Search
66
+
67
+ Implement semantic search with vector similarity:
68
+
69
+ ```typescript
70
+ import { cosineSimilarity } from '@/plugins/ai/lib/utils'
71
+
72
+ async function semanticSearch(query: string, documents: Document[]) {
73
+ // Generate query embedding
74
+ const queryEmbedding = await generateEmbedding(query)
75
+
76
+ // Calculate similarity scores
77
+ const results = documents.map(doc => ({
78
+ document: doc,
79
+ similarity: cosineSimilarity(queryEmbedding, doc.embedding)
80
+ }))
81
+
82
+ // Sort by similarity (highest first)
83
+ results.sort((a, b) => b.similarity - a.similarity)
84
+
85
+ return results.slice(0, 10) // Top 10 results
86
+ }
87
+
88
+ // Usage
89
+ const results = await semanticSearch(
90
+ 'How do I configure authentication?',
91
+ allDocuments
92
+ )
93
+ ```
94
+
95
+ ## Vector Database Integration
96
+
97
+ Store and query embeddings efficiently using a vector database:
98
+
99
+ ### Pinecone Example
100
+
101
+ ```typescript
102
+ import { Pinecone } from '@pinecone-database/pinecone'
103
+
104
+ const pinecone = new Pinecone({
105
+ apiKey: process.env.PINECONE_API_KEY!
106
+ })
107
+
108
+ const index = pinecone.index('documentation')
109
+
110
+ // Upsert embeddings
111
+ await index.upsert([
112
+ {
113
+ id: 'doc-1',
114
+ values: embedding,
115
+ metadata: {
116
+ title: 'Getting Started',
117
+ content: 'How to get started...',
118
+ url: '/docs/getting-started'
119
+ }
120
+ }
121
+ ])
122
+
123
+ // Query similar vectors
124
+ const queryResults = await index.query({
125
+ vector: queryEmbedding,
126
+ topK: 10,
127
+ includeMetadata: true
128
+ })
129
+ ```
130
+
131
+ ### Supabase pgvector Example
132
+
133
+ ```typescript
134
+ import { createClient } from '@supabase/supabase-js'
135
+
136
+ const supabase = createClient(
137
+ process.env.SUPABASE_URL!,
138
+ process.env.SUPABASE_KEY!
139
+ )
140
+
141
+ // Store embedding
142
+ await supabase.from('documents').insert({
143
+ content: 'Document content',
144
+ embedding: embedding
145
+ })
146
+
147
+ // Semantic search with pgvector
148
+ const { data } = await supabase.rpc('match_documents', {
149
+ query_embedding: queryEmbedding,
150
+ match_threshold: 0.7,
151
+ match_count: 10
152
+ })
153
+ ```
154
+
155
+ ## Use Cases
156
+
157
+ ### 1. Documentation Search
158
+
159
+ ```typescript
160
+ // Build searchable documentation
161
+ const docs = await getAllDocs()
162
+
163
+ for (const doc of docs) {
164
+ const embedding = await generateEmbedding(doc.content)
165
+ await saveEmbedding(doc.id, embedding)
166
+ }
167
+
168
+ // Search
169
+ const results = await semanticSearch('How to deploy?', docs)
170
+ ```
171
+
172
+ ### 2. Content Recommendations
173
+
174
+ ```typescript
175
+ // Recommend similar articles
176
+ async function recommendSimilar(articleId: string, limit = 5) {
177
+ const article = await getArticle(articleId)
178
+ const similar = await semanticSearch(article.content, allArticles)
179
+
180
+ return similar
181
+ .filter(r => r.document.id !== articleId)
182
+ .slice(0, limit)
183
+ }
184
+ ```
185
+
186
+ ### 3. Duplicate Detection
187
+
188
+ ```typescript
189
+ // Find duplicate or very similar content
190
+ async function findDuplicates(threshold = 0.95) {
191
+ const duplicates = []
192
+
193
+ for (let i = 0; i < documents.length; i++) {
194
+ for (let j = i + 1; j < documents.length; j++) {
195
+ const similarity = cosineSimilarity(
196
+ documents[i].embedding,
197
+ documents[j].embedding
198
+ )
199
+
200
+ if (similarity > threshold) {
201
+ duplicates.push([documents[i], documents[j], similarity])
202
+ }
203
+ }
204
+ }
205
+
206
+ return duplicates
207
+ }
208
+ ```
209
+
210
+ ## Best Practices
211
+
212
+ 1. **Chunk long documents** into smaller pieces (max 8191 tokens)
213
+ 2. **Cache embeddings** - they don't change for the same text
214
+ 3. **Use batch processing** for multiple texts to reduce API calls
215
+ 4. **Choose the right model**:
216
+ - `text-embedding-3-small`: Faster, cheaper, good for most cases
217
+ - `text-embedding-3-large`: Higher accuracy, more expensive
218
+ 5. **Store embeddings efficiently** using a vector database
219
+ 6. **Update embeddings** when content changes
220
+
221
+ ## Performance Tips
222
+
223
+ ```typescript
224
+ // Process in parallel with rate limiting
225
+ import pLimit from 'p-limit'
226
+
227
+ const limit = pLimit(5) // Max 5 concurrent requests
228
+
229
+ const embeddings = await Promise.all(
230
+ documents.map(doc =>
231
+ limit(() => generateEmbedding(doc.content))
232
+ )
233
+ )
234
+ ```
235
+
236
+ ## Cost Optimization
237
+
238
+ - `text-embedding-3-small`: ~$0.02 per 1M tokens
239
+ - `text-embedding-3-large`: ~$0.13 per 1M tokens
240
+ - Cache embeddings to avoid regenerating
241
+ - Use smaller models when accuracy difference is minimal