@awareness-sdk/local 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,303 @@
1
+ /**
2
+ * Config Manager for Awareness Local
3
+ *
4
+ * Handles:
5
+ * - Device ID generation (unique per machine)
6
+ * - .awareness/ directory scaffolding + .gitignore
7
+ * - config.json creation, loading, and updating
8
+ */
9
+
10
+ import fs from 'node:fs';
11
+ import path from 'node:path';
12
+ import os from 'node:os';
13
+ import crypto from 'node:crypto';
14
+
15
+ // ---------------------------------------------------------------------------
16
+ // Constants
17
+ // ---------------------------------------------------------------------------
18
+
19
+ const AWARENESS_DIR = '.awareness';
20
+ const CONFIG_FILENAME = 'config.json';
21
+
22
+ /** Subdirectories to create inside .awareness/ */
23
+ const SUBDIRS = [
24
+ 'memories',
25
+ 'knowledge',
26
+ 'knowledge/decisions',
27
+ 'knowledge/solutions',
28
+ 'knowledge/workflows',
29
+ 'knowledge/insights',
30
+ 'tasks',
31
+ 'tasks/open',
32
+ 'tasks/done',
33
+ ];
34
+
35
+ /** Files/patterns that must NOT be committed to Git */
36
+ const GITIGNORE_CONTENT = `# SQLite index (rebuilt locally on each device)
37
+ index.db
38
+ index.db-journal
39
+ index.db-wal
40
+
41
+ # Daemon runtime files
42
+ daemon.pid
43
+ daemon.log
44
+
45
+ # Cloud sync credentials (security-sensitive)
46
+ config.json
47
+ `;
48
+
49
+ /** Default configuration matching spec section 7.5 */
50
+ const DEFAULT_CONFIG = Object.freeze({
51
+ version: 1,
52
+ daemon: {
53
+ port: 37800,
54
+ auto_start: true,
55
+ log_level: 'info',
56
+ },
57
+ device: {
58
+ id: '', // filled by generateDeviceId()
59
+ name: '', // filled by hostname
60
+ },
61
+ agent: {
62
+ default_role: 'builder_agent',
63
+ },
64
+ extraction: {
65
+ enabled: true,
66
+ },
67
+ embedding: {
68
+ language: 'english',
69
+ model_id: null,
70
+ },
71
+ cloud: {
72
+ enabled: false,
73
+ api_base: 'https://awareness.market/api/v1',
74
+ api_key: '',
75
+ memory_id: '',
76
+ auto_sync: true,
77
+ sync_interval_min: 5,
78
+ last_push_at: null,
79
+ last_pull_at: null,
80
+ push_cursor: null,
81
+ pull_cursor: null,
82
+ },
83
+ git_sync: {
84
+ enabled: true,
85
+ auto_commit: false,
86
+ branch: null,
87
+ },
88
+ });
89
+
90
+ // ---------------------------------------------------------------------------
91
+ // Device ID
92
+ // ---------------------------------------------------------------------------
93
+
94
+ /**
95
+ * Generate a deterministic-ish device fingerprint.
96
+ * Format: "{platform}-{hostname-slug}-{4-hex}"
97
+ *
98
+ * The 4-hex suffix is derived from a hash of hostname + homedir + platform
99
+ * so it is stable across restarts but distinct across machines.
100
+ *
101
+ * @returns {string} e.g. "mac-edwins-mbp-a3f2"
102
+ */
103
+ export function generateDeviceId() {
104
+ const hostname = os.hostname().toLowerCase();
105
+ const platform = processPlatformLabel();
106
+ const slug = slugify(hostname, 20);
107
+
108
+ // Deterministic short hash based on multiple machine signals
109
+ const raw = `${os.hostname()}|${os.homedir()}|${os.platform()}|${os.arch()}`;
110
+ const hash = crypto.createHash('sha256').update(raw).digest('hex');
111
+ const suffix = hash.slice(0, 4);
112
+
113
+ return `${platform}-${slug}-${suffix}`;
114
+ }
115
+
116
+ // ---------------------------------------------------------------------------
117
+ // Directory scaffolding
118
+ // ---------------------------------------------------------------------------
119
+
120
+ /**
121
+ * Create the full .awareness/ directory tree and write .gitignore.
122
+ * Safe to call multiple times (idempotent).
123
+ *
124
+ * @param {string} projectDir - Absolute path to the project root
125
+ * @returns {string} Absolute path to the .awareness/ directory
126
+ */
127
+ export function ensureLocalDirs(projectDir) {
128
+ const awarenessDir = path.join(projectDir, AWARENESS_DIR);
129
+
130
+ // Create root dir
131
+ fs.mkdirSync(awarenessDir, { recursive: true });
132
+
133
+ // Create all subdirectories
134
+ for (const sub of SUBDIRS) {
135
+ fs.mkdirSync(path.join(awarenessDir, sub), { recursive: true });
136
+ }
137
+
138
+ // Write .gitignore (overwrite every time to keep it in sync with spec)
139
+ const gitignorePath = path.join(awarenessDir, '.gitignore');
140
+ fs.writeFileSync(gitignorePath, GITIGNORE_CONTENT, 'utf-8');
141
+
142
+ return awarenessDir;
143
+ }
144
+
145
+ // ---------------------------------------------------------------------------
146
+ // Config CRUD
147
+ // ---------------------------------------------------------------------------
148
+
149
+ /**
150
+ * Return the absolute path to .awareness/config.json
151
+ *
152
+ * @param {string} projectDir
153
+ * @returns {string}
154
+ */
155
+ export function getConfigPath(projectDir) {
156
+ return path.join(projectDir, AWARENESS_DIR, CONFIG_FILENAME);
157
+ }
158
+
159
+ /**
160
+ * Create config.json with all defaults. If the file already exists it is
161
+ * left untouched (use loadLocalConfig + saveCloudConfig to modify).
162
+ *
163
+ * @param {string} projectDir
164
+ * @returns {object} The config object that was written (or already existed)
165
+ */
166
+ export function initLocalConfig(projectDir) {
167
+ const configPath = getConfigPath(projectDir);
168
+
169
+ // Idempotent — never overwrite an existing config
170
+ if (fs.existsSync(configPath)) {
171
+ return loadLocalConfig(projectDir);
172
+ }
173
+
174
+ // Ensure parent dirs exist
175
+ ensureLocalDirs(projectDir);
176
+
177
+ const deviceId = generateDeviceId();
178
+ const deviceName = os.hostname();
179
+
180
+ const config = deepClone(DEFAULT_CONFIG);
181
+ config.device.id = deviceId;
182
+ config.device.name = deviceName;
183
+
184
+ fs.writeFileSync(configPath, JSON.stringify(config, null, 2), 'utf-8');
185
+ return config;
186
+ }
187
+
188
+ /**
189
+ * Read and return config.json. Missing keys are filled from DEFAULT_CONFIG
190
+ * so callers always get a complete shape.
191
+ *
192
+ * @param {string} projectDir
193
+ * @returns {object}
194
+ */
195
+ export function loadLocalConfig(projectDir) {
196
+ const configPath = getConfigPath(projectDir);
197
+
198
+ if (!fs.existsSync(configPath)) {
199
+ // Return defaults without writing — caller may want to init explicitly
200
+ const fallback = deepClone(DEFAULT_CONFIG);
201
+ fallback.device.id = generateDeviceId();
202
+ fallback.device.name = os.hostname();
203
+ return fallback;
204
+ }
205
+
206
+ try {
207
+ const raw = fs.readFileSync(configPath, 'utf-8');
208
+ const saved = JSON.parse(raw);
209
+ // Merge saved over defaults so new keys added in future versions are present
210
+ return deepMerge(deepClone(DEFAULT_CONFIG), saved);
211
+ } catch (err) {
212
+ // Corrupted JSON — return defaults rather than crash
213
+ const fallback = deepClone(DEFAULT_CONFIG);
214
+ fallback.device.id = generateDeviceId();
215
+ fallback.device.name = os.hostname();
216
+ return fallback;
217
+ }
218
+ }
219
+
220
+ /**
221
+ * Update the cloud section of config.json after device-auth completes.
222
+ *
223
+ * @param {string} projectDir
224
+ * @param {{ apiKey: string, memoryId: string, apiBase?: string }} cloudOpts
225
+ * @returns {object} The updated full config
226
+ */
227
+ export function saveCloudConfig(projectDir, { apiKey, memoryId, apiBase }) {
228
+ const config = loadLocalConfig(projectDir);
229
+
230
+ config.cloud.enabled = true;
231
+ config.cloud.api_key = apiKey;
232
+ config.cloud.memory_id = memoryId;
233
+ if (apiBase) {
234
+ config.cloud.api_base = apiBase;
235
+ }
236
+
237
+ const configPath = getConfigPath(projectDir);
238
+ // Ensure dirs exist before writing
239
+ ensureLocalDirs(projectDir);
240
+ fs.writeFileSync(configPath, JSON.stringify(config, null, 2), 'utf-8');
241
+
242
+ return config;
243
+ }
244
+
245
+ // ---------------------------------------------------------------------------
246
+ // Helpers (internal)
247
+ // ---------------------------------------------------------------------------
248
+
249
+ /** Map os.platform() to a short human label */
250
+ function processPlatformLabel() {
251
+ const p = os.platform();
252
+ if (p === 'darwin') return 'mac';
253
+ if (p === 'win32') return 'win';
254
+ if (p === 'linux') return 'linux';
255
+ return p;
256
+ }
257
+
258
+ /**
259
+ * Turn an arbitrary string into a URL/filename-safe slug.
260
+ * Non-ASCII chars are removed, spaces/underscores become hyphens, consecutive
261
+ * hyphens are collapsed, and the result is trimmed to maxLen.
262
+ */
263
+ function slugify(text, maxLen = 50) {
264
+ return text
265
+ .toLowerCase()
266
+ .replace(/[^a-z0-9\s-]/g, '')
267
+ .replace(/[\s_]+/g, '-')
268
+ .replace(/-+/g, '-')
269
+ .replace(/^-|-$/g, '')
270
+ .slice(0, maxLen);
271
+ }
272
+
273
+ /** Structured clone polyfill for plain objects */
274
+ function deepClone(obj) {
275
+ return JSON.parse(JSON.stringify(obj));
276
+ }
277
+
278
+ /**
279
+ * Recursively merge `source` into `target`.
280
+ * - Scalar values in source overwrite target
281
+ * - Objects are merged recursively
282
+ * - Arrays in source overwrite target (no concat)
283
+ */
284
+ function deepMerge(target, source) {
285
+ for (const key of Object.keys(source)) {
286
+ const srcVal = source[key];
287
+ const tgtVal = target[key];
288
+
289
+ if (
290
+ srcVal !== null &&
291
+ typeof srcVal === 'object' &&
292
+ !Array.isArray(srcVal) &&
293
+ tgtVal !== null &&
294
+ typeof tgtVal === 'object' &&
295
+ !Array.isArray(tgtVal)
296
+ ) {
297
+ target[key] = deepMerge(tgtVal, srcVal);
298
+ } else {
299
+ target[key] = srcVal;
300
+ }
301
+ }
302
+ return target;
303
+ }
@@ -0,0 +1,239 @@
1
+ /**
2
+ * Embedder — local embedding module for Awareness Local.
3
+ *
4
+ * Uses @huggingface/transformers (ONNX WASM) for purely-in-JS inference.
5
+ * Falls back gracefully to FTS5-only mode when the dependency is missing.
6
+ *
7
+ * Two model options (user-facing names hide actual model identifiers):
8
+ * "english" → Xenova/all-MiniLM-L6-v2 (23 MB, English only)
9
+ * "multilingual" → Xenova/multilingual-e5-small (118 MB, 100+ languages)
10
+ *
11
+ * Both produce 384-dimensional Float32Array vectors.
12
+ */
13
+
14
+ // ---------------------------------------------------------------------------
15
+ // Model map
16
+ // ---------------------------------------------------------------------------
17
+
18
+ export const MODEL_MAP = {
19
+ english: 'Xenova/all-MiniLM-L6-v2',
20
+ multilingual: 'Xenova/multilingual-e5-small',
21
+ };
22
+
23
+ /**
24
+ * Models whose architecture requires a "query: " / "passage: " prefix.
25
+ * Currently only the e5 family needs this.
26
+ */
27
+ const E5_MODELS = new Set([MODEL_MAP.multilingual]);
28
+
29
+ // ---------------------------------------------------------------------------
30
+ // Pipeline cache (one per language/model)
31
+ // ---------------------------------------------------------------------------
32
+
33
+ /** @type {Map<string, Promise<any>>} */
34
+ const _pipelineCache = new Map();
35
+
36
+ /** Whether the HF transformers library is available at all. */
37
+ let _hfAvailable = null; // null = not checked yet, true/false after first probe
38
+
39
+ /**
40
+ * Dynamically import @huggingface/transformers.
41
+ * Returns the module or null if not installed.
42
+ * @private
43
+ */
44
+ async function _loadHfModule() {
45
+ if (_hfAvailable === false) return null;
46
+ try {
47
+ const mod = await import('@huggingface/transformers');
48
+ _hfAvailable = true;
49
+ return mod;
50
+ } catch {
51
+ _hfAvailable = false;
52
+ console.warn(
53
+ '[embedder] @huggingface/transformers is not installed. ' +
54
+ 'Embedding-based semantic search is disabled; falling back to FTS5-only mode. ' +
55
+ 'Install it with: npm install @huggingface/transformers'
56
+ );
57
+ return null;
58
+ }
59
+ }
60
+
61
+ // ---------------------------------------------------------------------------
62
+ // Public API
63
+ // ---------------------------------------------------------------------------
64
+
65
+ /**
66
+ * Lazy-load (and cache) the embedding pipeline for the given language.
67
+ *
68
+ * @param {string} [language='english'] — 'english' | 'multilingual'
69
+ * @returns {Promise<Function|null>} — the HF pipeline function, or null if unavailable.
70
+ */
71
+ export async function getEmbedder(language = 'english') {
72
+ const modelId = MODEL_MAP[language] || MODEL_MAP.english;
73
+
74
+ if (_pipelineCache.has(modelId)) {
75
+ return _pipelineCache.get(modelId);
76
+ }
77
+
78
+ // Store the promise itself so concurrent callers share the same load.
79
+ const loadPromise = (async () => {
80
+ const hf = await _loadHfModule();
81
+ if (!hf) return null;
82
+
83
+ const pipe = await hf.pipeline('feature-extraction', modelId, {
84
+ dtype: 'q8', // INT8 quantised
85
+ });
86
+ return pipe;
87
+ })();
88
+
89
+ _pipelineCache.set(modelId, loadPromise);
90
+
91
+ // If the load fails, evict the cache entry so the next call can retry.
92
+ loadPromise.catch(() => {
93
+ _pipelineCache.delete(modelId);
94
+ });
95
+
96
+ return loadPromise;
97
+ }
98
+
99
+ /**
100
+ * Check whether embedding is available (HF library installed).
101
+ *
102
+ * @returns {Promise<boolean>}
103
+ */
104
+ export async function isEmbeddingAvailable() {
105
+ if (_hfAvailable !== null) return _hfAvailable;
106
+ const mod = await _loadHfModule();
107
+ return mod !== null;
108
+ }
109
+
110
+ /**
111
+ * Embed a single text string.
112
+ *
113
+ * @param {string} text
114
+ * @param {string} [type='passage'] — 'query' | 'passage' (affects e5 prefix).
115
+ * @param {string} [language='english'] — 'english' | 'multilingual'.
116
+ * @returns {Promise<Float32Array>} — 384-dimensional normalised vector.
117
+ * @throws {Error} if embedding is unavailable.
118
+ */
119
+ export async function embed(text, type = 'passage', language = 'english') {
120
+ const pipe = await getEmbedder(language);
121
+ if (!pipe) {
122
+ throw new Error(
123
+ 'Embedding unavailable: @huggingface/transformers is not installed.'
124
+ );
125
+ }
126
+
127
+ const modelId = MODEL_MAP[language] || MODEL_MAP.english;
128
+ const input = E5_MODELS.has(modelId) ? `${type}: ${text}` : text;
129
+
130
+ const output = await pipe(input, { pooling: 'mean', normalize: true });
131
+ return new Float32Array(output.data);
132
+ }
133
+
134
+ /**
135
+ * Embed multiple texts in a single batch call.
136
+ *
137
+ * @param {string[]} texts
138
+ * @param {string} [type='passage']
139
+ * @param {string} [language='english']
140
+ * @returns {Promise<Float32Array[]>}
141
+ * @throws {Error} if embedding is unavailable.
142
+ */
143
+ export async function embedBatch(texts, type = 'passage', language = 'english') {
144
+ if (!texts || texts.length === 0) return [];
145
+
146
+ const pipe = await getEmbedder(language);
147
+ if (!pipe) {
148
+ throw new Error(
149
+ 'Embedding unavailable: @huggingface/transformers is not installed.'
150
+ );
151
+ }
152
+
153
+ const modelId = MODEL_MAP[language] || MODEL_MAP.english;
154
+ const usePrefix = E5_MODELS.has(modelId);
155
+
156
+ const inputs = usePrefix ? texts.map((t) => `${type}: ${t}`) : texts;
157
+
158
+ const output = await pipe(inputs, { pooling: 'mean', normalize: true });
159
+
160
+ // The pipeline returns a nested tensor; output.tolist() gives number[][].
161
+ // We convert each sub-array to a Float32Array.
162
+ const dim = 384;
163
+ const results = [];
164
+ if (output.data && output.data.length === texts.length * dim) {
165
+ // Flat buffer — slice into per-text vectors.
166
+ for (let i = 0; i < texts.length; i++) {
167
+ results.push(new Float32Array(output.data.slice(i * dim, (i + 1) * dim)));
168
+ }
169
+ } else if (typeof output.tolist === 'function') {
170
+ const nested = output.tolist();
171
+ for (const row of nested) {
172
+ results.push(new Float32Array(row));
173
+ }
174
+ } else {
175
+ // Fallback: embed one-by-one.
176
+ for (const text of texts) {
177
+ results.push(await embed(text, type, language));
178
+ }
179
+ }
180
+
181
+ return results;
182
+ }
183
+
184
+ // ---------------------------------------------------------------------------
185
+ // Vector utilities
186
+ // ---------------------------------------------------------------------------
187
+
188
+ /**
189
+ * Cosine similarity between two vectors.
190
+ *
191
+ * @param {Float32Array} a
192
+ * @param {Float32Array} b
193
+ * @returns {number} — value in [-1, 1].
194
+ */
195
+ export function cosineSimilarity(a, b) {
196
+ if (a.length !== b.length) {
197
+ throw new Error(
198
+ `Vector dimension mismatch: ${a.length} vs ${b.length}`
199
+ );
200
+ }
201
+
202
+ let dot = 0;
203
+ let normA = 0;
204
+ let normB = 0;
205
+
206
+ for (let i = 0; i < a.length; i++) {
207
+ dot += a[i] * b[i];
208
+ normA += a[i] * a[i];
209
+ normB += b[i] * b[i];
210
+ }
211
+
212
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
213
+ if (denom === 0) return 0;
214
+ return dot / denom;
215
+ }
216
+
217
+ /**
218
+ * Convert a Float32Array to a Buffer suitable for SQLite BLOB storage.
219
+ *
220
+ * @param {Float32Array} vector
221
+ * @returns {Buffer}
222
+ */
223
+ export function vectorToBuffer(vector) {
224
+ return Buffer.from(vector.buffer, vector.byteOffset, vector.byteLength);
225
+ }
226
+
227
+ /**
228
+ * Convert a Buffer (from SQLite BLOB) back to a Float32Array.
229
+ *
230
+ * @param {Buffer} buffer
231
+ * @returns {Float32Array}
232
+ */
233
+ export function bufferToVector(buffer) {
234
+ return new Float32Array(
235
+ buffer.buffer,
236
+ buffer.byteOffset,
237
+ buffer.byteLength / Float32Array.BYTES_PER_ELEMENT
238
+ );
239
+ }
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Core module barrel export for Awareness Local
3
+ *
4
+ * Re-exports all core modules so consumers can do:
5
+ * import { MemoryStore, Indexer, SearchEngine } from './core/index.mjs';
6
+ */
7
+
8
+ export {
9
+ ensureLocalDirs,
10
+ initLocalConfig,
11
+ loadLocalConfig,
12
+ saveCloudConfig,
13
+ getConfigPath,
14
+ generateDeviceId,
15
+ } from './config.mjs';
16
+
17
+ export { MemoryStore } from './memory-store.mjs';
18
+
19
+ export { Indexer } from './indexer.mjs';
20
+
21
+ export {
22
+ getEmbedder,
23
+ embed,
24
+ embedBatch,
25
+ cosineSimilarity,
26
+ vectorToBuffer,
27
+ bufferToVector,
28
+ } from './embedder.mjs';
29
+
30
+ export { SearchEngine } from './search.mjs';
31
+
32
+ export { KnowledgeExtractor } from './knowledge-extractor.mjs';
33
+
34
+ export { CloudSync } from './cloud-sync.mjs';