@harperfast/skills 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,152 @@
1
+ ---
2
+ name: vector-indexing
3
+ description: How to enable and query vector indexes for similarity search in Harper.
4
+ ---
5
+
6
+ # Vector Indexing
7
+
8
+ Instructions for the agent to follow when implementing vector search in Harper.
9
+
10
+ ## When to Use
11
+
12
+ Use this skill when you need to perform similarity searches on high-dimensional data, such as AI embeddings for semantic search, recommendations, or image retrieval.
13
+
14
+ ## Steps
15
+
16
+ 1. **Enable Vector Indexing**: In your GraphQL schema, add `@indexed(type: "HNSW")` to a numeric array field:
17
+ ```graphql
18
+ type Product @table {
19
+ id: ID @primaryKey
20
+ textEmbeddings: [Float] @indexed(type: "HNSW")
21
+ }
22
+ ```
23
+ 2. **Configure Index Options (Optional)**: Fine-tune the index with parameters like `distance` (`cosine` or `euclidean`), `M`, and `efConstruction`.
24
+ 3. **Query with Vector Search**: Use `tables.Table.search()` with a `sort` object containing the `target` vector:
25
+ ```javascript
26
+ const results = await tables.Product.search({
27
+ select: ['name', '$distance'],
28
+ sort: {
29
+ attribute: 'textEmbeddings',
30
+ target: [0.1, 0.2, ...], // query vector
31
+ },
32
+ limit: 5,
33
+ });
34
+ ```
35
+ 4. **Filter by Distance**: Use `conditions` with a `target` vector and a `comparator` (e.g., `lt`) to return results within a similarity threshold:
36
+ ```javascript
37
+ const results = await tables.Product.search({
38
+ conditions: {
39
+ attribute: 'textEmbeddings',
40
+ comparator: 'lt',
41
+ value: 0.1,
42
+ target: searchVector,
43
+ },
44
+ });
45
+ ```
46
+ 5. **Generate Embeddings**: Use external services (OpenAI, Ollama) to generate the numeric vectors before storing or searching them in Harper.
47
+
48
+ const { Product } = tables;
49
+
50
+ import OpenAI from 'openai';
51
+ const openai = new OpenAI();
52
+ // the name of the OpenAI embedding model
53
+ const OPENAI_EMBEDDING_MODEL = 'text-embedding-3-small';
54
+
55
+ const SIMILARITY_THRESHOLD = 0.5;
56
+
57
+ export class ProductSearch extends Resource {
58
+ // based on env variable we choose the appropriate embedding generator
59
+ generateEmbedding = process.env.EMBEDDING_GENERATOR === 'ollama'
60
+ ? this._generateOllamaEmbedding
61
+ : this._generateOpenAIEmbedding;
62
+
63
+ /**
64
+ * Executes a search query using a generated text embedding and returns the matching products.
65
+ *
66
+ * @param {Object} data - The input data for the request.
67
+ * @param {string} data.prompt - The prompt to generate the text embedding from.
68
+ * @return {Promise<Array>} Returns a promise that resolves to an array of products matching the conditions,
69
+ * including fields: name, description, price, and $distance.
70
+ */
71
+ async post(data) {
72
+ const embedding = await this.generateEmbedding(data.prompt);
73
+
74
+ return await Product.search({
75
+ select: ['name', 'description', 'price', '$distance'],
76
+ conditions: {
77
+ attribute: 'textEmbeddings',
78
+ comparator: 'lt',
79
+ value: SIMILARITY_THRESHOLD,
80
+ target: embedding[0],
81
+ },
82
+ limit: 5,
83
+ });
84
+ }
85
+
86
+ /**
87
+ * Generates an embedding using the Ollama API.
88
+ *
89
+ * @param {string} promptData - The input data for which the embedding is to be generated.
90
+ * @return {Promise<number[][]>} A promise that resolves to the generated embedding as an array of numbers.
91
+ */
92
+ async _generateOllamaEmbedding(promptData) {
93
+ const embedding = await ollama.embed({
94
+ model: OLLAMA_EMBEDDING_MODEL,
95
+ input: promptData,
96
+ });
97
+ return embedding?.embeddings;
98
+ }
99
+
100
+ /**
101
+ * Generates OpenAI embeddings based on the given prompt data.
102
+ *
103
+ * @param {string} promptData - The input data used for generating the embedding.
104
+ * @return {Promise<number[][]>} A promise that resolves to an array of embeddings, where each embedding is an array of floats.
105
+ */
106
+ async _generateOpenAIEmbedding(promptData) {
107
+ const embedding = await openai.embeddings.create({
108
+ model: OPENAI_EMBEDDING_MODEL,
109
+ input: promptData,
110
+ encoding_format: 'float',
111
+ });
112
+
113
+ let embeddings = [];
114
+ embedding.data.forEach((embeddingData) => {
115
+ embeddings.push(embeddingData.embedding);
116
+ });
117
+
118
+ return embeddings;
119
+ }
120
+
121
+ }
122
+
123
+ ````
124
+ Sample request to the `ProductSearch` resource which prompts to find "shorts for the gym":
125
+
126
+ ```bash
127
+ curl -X POST "http://localhost:9926/ProductSearch/" \
128
+ -H "accept: \
129
+ -H "Content-Type: application/json" \
130
+ -H "Authorization: Basic <YOUR_AUTH>" \
131
+ -d '{"prompt": "shorts for the gym"}'
132
+ ````
133
+
134
+ ---
135
+
136
+ ## When to Use Vector Indexing
137
+
138
+ Vector indexing is ideal when:
139
+
140
+ - Storing embedding vectors from ML models
141
+ - Performing semantic or similarity-based search
142
+ - Working with high-dimensional numeric data
143
+ - Exact-match indexes are insufficient
144
+
145
+ ---
146
+
147
+ ## Summary
148
+
149
+ - Vector indexing enables fast similarity search on numeric arrays
150
+ - Defined using `@indexed(type: "HNSW")`
151
+ - Queried using a target vector in search sorting
152
+ - Tunable for performance and accuracy
package/package.json ADDED
@@ -0,0 +1,35 @@
1
+ {
2
+ "name": "@harperfast/skills",
3
+ "version": "1.0.0",
4
+ "description": "Best practices for making awesome Harper apps with your favorite Agent",
5
+ "repository": "github:HarperFast/skills",
6
+ "bugs": {
7
+ "url": "https://github.com/harperfast/skills/issues"
8
+ },
9
+ "homepage": "https://github.com/harperfast",
10
+ "author": {
11
+ "name": "Harper",
12
+ "email": "support@harperdb.io"
13
+ },
14
+ "license": "Apache License 2.0",
15
+ "keywords": [],
16
+
17
+ "type": "module",
18
+ "scripts": {
19
+ "validate": "node scripts/validate-skills.mjs"
20
+ },
21
+ "devDependencies": {
22
+ "@commitlint/cli": "^20.4.1",
23
+ "@commitlint/config-conventional": "^20.4.1",
24
+ "@semantic-release/commit-analyzer": "^13.0.1",
25
+ "@semantic-release/git": "^10.0.1",
26
+ "@semantic-release/github": "^12.0.3",
27
+ "@semantic-release/npm": "^13.1.3",
28
+ "@semantic-release/release-notes-generator": "^14.1.0",
29
+ "@types/node": "^25.2.0",
30
+ "conventional-changelog-conventionalcommits": "^9.1.0",
31
+ "dprint": "^0.51.1",
32
+ "gray-matter": "^4.0.3",
33
+ "semantic-release": "^25.0.3"
34
+ }
35
+ }