@hasna/search 0.0.8 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +2 -1
- package/README.md +78 -9
- package/dist/cli/index.js +1761 -198
- package/dist/cli/local.d.ts +3 -0
- package/dist/cli/local.d.ts.map +1 -0
- package/dist/cli/storage.d.ts +3 -0
- package/dist/cli/storage.d.ts.map +1 -0
- package/dist/db/database.d.ts.map +1 -1
- package/dist/db/index-db.d.ts +6 -0
- package/dist/db/index-db.d.ts.map +1 -0
- package/dist/db/index-migrations.d.ts +3 -0
- package/dist/db/index-migrations.d.ts.map +1 -0
- package/dist/db/migrations.d.ts.map +1 -1
- package/dist/db/pg-migrations.d.ts +1 -1
- package/dist/db/providers.d.ts.map +1 -1
- package/dist/db/storage-config.d.ts +26 -0
- package/dist/db/storage-config.d.ts.map +1 -0
- package/dist/db/storage-sync.d.ts +35 -0
- package/dist/db/storage-sync.d.ts.map +1 -0
- package/dist/index.d.ts +9 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2459 -118
- package/dist/lib/config.d.ts.map +1 -1
- package/dist/lib/dedup.d.ts.map +1 -1
- package/dist/lib/local/find.d.ts +38 -0
- package/dist/lib/local/find.d.ts.map +1 -0
- package/dist/lib/local/ignore.d.ts +38 -0
- package/dist/lib/local/ignore.d.ts.map +1 -0
- package/dist/lib/local/indexer.d.ts +62 -0
- package/dist/lib/local/indexer.d.ts.map +1 -0
- package/dist/lib/local/query.d.ts +60 -0
- package/dist/lib/local/query.d.ts.map +1 -0
- package/dist/lib/local/regex.d.ts +26 -0
- package/dist/lib/local/regex.d.ts.map +1 -0
- package/dist/lib/local/walker.d.ts +30 -0
- package/dist/lib/local/walker.d.ts.map +1 -0
- package/dist/lib/providers/content.d.ts +9 -0
- package/dist/lib/providers/content.d.ts.map +1 -0
- package/dist/lib/providers/files.d.ts +9 -0
- package/dist/lib/providers/files.d.ts.map +1 -0
- package/dist/lib/providers/index.d.ts.map +1 -1
- package/dist/lib/search.d.ts.map +1 -1
- package/dist/mcp/http.d.ts +15 -0
- package/dist/mcp/http.d.ts.map +1 -0
- package/dist/mcp/index.js +14334 -11630
- package/dist/mcp/server.d.ts +5 -0
- package/dist/mcp/server.d.ts.map +1 -0
- package/dist/mcp/storage-tools.d.ts +3 -0
- package/dist/mcp/storage-tools.d.ts.map +1 -0
- package/dist/server/index.js +28445 -4917
- package/dist/server/serve.d.ts.map +1 -1
- package/dist/storage.d.ts +7 -0
- package/dist/storage.d.ts.map +1 -0
- package/dist/storage.js +5584 -0
- package/dist/types/index.d.ts +10 -2
- package/dist/types/index.d.ts.map +1 -1
- package/package.json +16 -4
- package/dist/cli/cloud.d.ts +0 -3
- package/dist/cli/cloud.d.ts.map +0 -1
- package/dist/db/cloud-config.d.ts +0 -14
- package/dist/db/cloud-config.d.ts.map +0 -1
- package/dist/db/cloud-sync.d.ts +0 -30
- package/dist/db/cloud-sync.d.ts.map +0 -1
- package/dist/mcp/cloud-tools.d.ts +0 -3
- package/dist/mcp/cloud-tools.d.ts.map +0 -1
package/dist/index.js
CHANGED
|
@@ -8901,9 +8901,15 @@ var PROVIDER_NAMES = [
|
|
|
8901
8901
|
"youtube",
|
|
8902
8902
|
"hackernews",
|
|
8903
8903
|
"github",
|
|
8904
|
-
"arxiv"
|
|
8904
|
+
"arxiv",
|
|
8905
|
+
"files",
|
|
8906
|
+
"content"
|
|
8905
8907
|
];
|
|
8906
8908
|
var SearchProviderNameSchema = exports_external.enum(PROVIDER_NAMES);
|
|
8909
|
+
var LOCAL_PROVIDER_NAMES = new Set([
|
|
8910
|
+
"files",
|
|
8911
|
+
"content"
|
|
8912
|
+
]);
|
|
8907
8913
|
var EXPORT_FORMATS = ["json", "csv", "md"];
|
|
8908
8914
|
var ExportFormatSchema = exports_external.enum(EXPORT_FORMATS);
|
|
8909
8915
|
var SearchOptionsSchema = exports_external.object({
|
|
@@ -8926,7 +8932,10 @@ var DEFAULT_CONFIG = {
|
|
|
8926
8932
|
fallbackCli: "microservice-transcriber"
|
|
8927
8933
|
},
|
|
8928
8934
|
dedup: true,
|
|
8929
|
-
maxConcurrent: 5
|
|
8935
|
+
maxConcurrent: 5,
|
|
8936
|
+
indexStaleMinutes: 5,
|
|
8937
|
+
indexAutoRefresh: true,
|
|
8938
|
+
recordLocalResults: false
|
|
8930
8939
|
};
|
|
8931
8940
|
var counter = 0;
|
|
8932
8941
|
function generateId() {
|
|
@@ -8971,7 +8980,7 @@ class ValidationError extends SearchError {
|
|
|
8971
8980
|
}
|
|
8972
8981
|
// src/db/database.ts
|
|
8973
8982
|
import { Database } from "bun:sqlite";
|
|
8974
|
-
import {
|
|
8983
|
+
import { mkdirSync } from "fs";
|
|
8975
8984
|
|
|
8976
8985
|
// src/db/migrations.ts
|
|
8977
8986
|
var migrations = [
|
|
@@ -9101,6 +9110,34 @@ var migrations = [
|
|
|
9101
9110
|
('prof-all', 'all', 'All enabled providers', '["google","serpapi","exa","perplexity","brave","bing","twitter","reddit","youtube","hackernews","github","arxiv"]', '{}', datetime('now'));
|
|
9102
9111
|
`);
|
|
9103
9112
|
}
|
|
9113
|
+
},
|
|
9114
|
+
{
|
|
9115
|
+
version: 4,
|
|
9116
|
+
description: "Local search providers (files, content) and local profile",
|
|
9117
|
+
up: (db) => {
|
|
9118
|
+
db.exec(`
|
|
9119
|
+
INSERT OR IGNORE INTO providers (name, enabled, api_key_env, rate_limit, metadata) VALUES
|
|
9120
|
+
('files', 1, '', 0, '{}'),
|
|
9121
|
+
('content', 1, '', 0, '{}');
|
|
9122
|
+
|
|
9123
|
+
INSERT OR IGNORE INTO search_profiles (id, name, description, providers, options, created_at) VALUES
|
|
9124
|
+
('prof-local', 'local', 'Local filesystem: file paths + content', '["files","content"]', '{}', datetime('now'));
|
|
9125
|
+
`);
|
|
9126
|
+
const row = db.query("SELECT id, providers FROM search_profiles WHERE name = 'all'").get();
|
|
9127
|
+
if (row) {
|
|
9128
|
+
let providers;
|
|
9129
|
+
try {
|
|
9130
|
+
providers = JSON.parse(row.providers);
|
|
9131
|
+
} catch {
|
|
9132
|
+
providers = [];
|
|
9133
|
+
}
|
|
9134
|
+
for (const name of ["files", "content"]) {
|
|
9135
|
+
if (!providers.includes(name))
|
|
9136
|
+
providers.push(name);
|
|
9137
|
+
}
|
|
9138
|
+
db.prepare("UPDATE search_profiles SET providers = ? WHERE id = ?").run(JSON.stringify(providers), row.id);
|
|
9139
|
+
}
|
|
9140
|
+
}
|
|
9104
9141
|
}
|
|
9105
9142
|
];
|
|
9106
9143
|
function runMigrations(db) {
|
|
@@ -9118,7 +9155,7 @@ function runMigrations(db) {
|
|
|
9118
9155
|
db.exec("BEGIN");
|
|
9119
9156
|
try {
|
|
9120
9157
|
migration.up(db);
|
|
9121
|
-
db.prepare("INSERT INTO _migrations (version, description) VALUES (?, ?)").run(migration.version, migration.description);
|
|
9158
|
+
db.prepare("INSERT OR IGNORE INTO _migrations (version, description) VALUES (?, ?)").run(migration.version, migration.description);
|
|
9122
9159
|
db.exec("COMMIT");
|
|
9123
9160
|
} catch (err) {
|
|
9124
9161
|
db.exec("ROLLBACK");
|
|
@@ -9129,17 +9166,6 @@ function runMigrations(db) {
|
|
|
9129
9166
|
|
|
9130
9167
|
// src/db/database.ts
|
|
9131
9168
|
var instance = null;
|
|
9132
|
-
function migrateDotfile() {
|
|
9133
|
-
const home = Bun.env.HOME ?? "/tmp";
|
|
9134
|
-
const oldDir = `${home}/.search`;
|
|
9135
|
-
const newDir = `${home}/.hasna/search`;
|
|
9136
|
-
if (existsSync(newDir) || !existsSync(oldDir))
|
|
9137
|
-
return;
|
|
9138
|
-
try {
|
|
9139
|
-
mkdirSync(`${home}/.hasna`, { recursive: true });
|
|
9140
|
-
cpSync(oldDir, newDir, { recursive: true });
|
|
9141
|
-
} catch {}
|
|
9142
|
-
}
|
|
9143
9169
|
function ensureFeedbackTable(db) {
|
|
9144
9170
|
db.exec(`
|
|
9145
9171
|
CREATE TABLE IF NOT EXISTS feedback (
|
|
@@ -9158,18 +9184,19 @@ function getDbPath() {
|
|
|
9158
9184
|
if (envPath)
|
|
9159
9185
|
return envPath;
|
|
9160
9186
|
const home = Bun.env.HOME ?? "/tmp";
|
|
9161
|
-
|
|
9162
|
-
|
|
9163
|
-
|
|
9164
|
-
return `${newDir}/data.db`;
|
|
9187
|
+
const dir = `${home}/.hasna/search`;
|
|
9188
|
+
mkdirSync(dir, { recursive: true });
|
|
9189
|
+
return `${dir}/data.db`;
|
|
9165
9190
|
}
|
|
9166
9191
|
function getDb() {
|
|
9167
9192
|
if (instance)
|
|
9168
9193
|
return instance;
|
|
9169
9194
|
const path = getDbPath();
|
|
9170
9195
|
const db = new Database(path);
|
|
9171
|
-
db.exec("PRAGMA synchronous = NORMAL");
|
|
9172
9196
|
db.exec("PRAGMA busy_timeout = 5000");
|
|
9197
|
+
db.exec("PRAGMA journal_mode = WAL");
|
|
9198
|
+
db.exec("PRAGMA foreign_keys = ON");
|
|
9199
|
+
db.exec("PRAGMA synchronous = NORMAL");
|
|
9173
9200
|
runMigrations(db);
|
|
9174
9201
|
ensureFeedbackTable(db);
|
|
9175
9202
|
instance = db;
|
|
@@ -9190,55 +9217,81 @@ function getDbForTesting() {
|
|
|
9190
9217
|
ensureFeedbackTable(db);
|
|
9191
9218
|
return db;
|
|
9192
9219
|
}
|
|
9193
|
-
// src/db/
|
|
9194
|
-
import { existsSync
|
|
9220
|
+
// src/db/storage-config.ts
|
|
9221
|
+
import { existsSync, readFileSync } from "fs";
|
|
9195
9222
|
import { homedir } from "os";
|
|
9196
9223
|
import { join } from "path";
|
|
9197
|
-
var
|
|
9198
|
-
|
|
9199
|
-
|
|
9200
|
-
|
|
9201
|
-
|
|
9202
|
-
|
|
9203
|
-
|
|
9204
|
-
function
|
|
9224
|
+
var STORAGE_CONFIG_PATH = join(homedir(), ".hasna", "search", "storage", "config.json");
|
|
9225
|
+
var SEARCH_STORAGE_ENV = "HASNA_SEARCH_DATABASE_URL";
|
|
9226
|
+
var SEARCH_STORAGE_FALLBACK_ENV = "SEARCH_DATABASE_URL";
|
|
9227
|
+
var SEARCH_STORAGE_MODE_ENV = "HASNA_SEARCH_STORAGE_MODE";
|
|
9228
|
+
var SEARCH_STORAGE_MODE_FALLBACK_ENV = "SEARCH_STORAGE_MODE";
|
|
9229
|
+
var STORAGE_DATABASE_ENV = [SEARCH_STORAGE_ENV, SEARCH_STORAGE_FALLBACK_ENV];
|
|
9230
|
+
var STORAGE_MODE_ENV = [SEARCH_STORAGE_MODE_ENV, SEARCH_STORAGE_MODE_FALLBACK_ENV];
|
|
9231
|
+
function readEnv(name) {
|
|
9232
|
+
const value = process.env[name]?.trim();
|
|
9233
|
+
return value || undefined;
|
|
9234
|
+
}
|
|
9235
|
+
function normalizeMode(value) {
|
|
9236
|
+
const normalized = value?.trim().toLowerCase();
|
|
9237
|
+
if (normalized === "local" || normalized === "hybrid" || normalized === "remote")
|
|
9238
|
+
return normalized;
|
|
9239
|
+
return;
|
|
9240
|
+
}
|
|
9241
|
+
function getStorageDatabaseEnvName() {
|
|
9242
|
+
for (const name of STORAGE_DATABASE_ENV) {
|
|
9243
|
+
if (readEnv(name))
|
|
9244
|
+
return name;
|
|
9245
|
+
}
|
|
9246
|
+
return null;
|
|
9247
|
+
}
|
|
9248
|
+
function getStorageDatabaseEnv() {
|
|
9249
|
+
const name = getStorageDatabaseEnvName();
|
|
9250
|
+
return name ? { name } : null;
|
|
9251
|
+
}
|
|
9252
|
+
function getStorageDatabaseUrl() {
|
|
9253
|
+
const env = getStorageDatabaseEnv();
|
|
9254
|
+
return env ? readEnv(env.name) : undefined;
|
|
9255
|
+
}
|
|
9256
|
+
function getStorageConfig() {
|
|
9205
9257
|
const config = {
|
|
9206
9258
|
mode: "local",
|
|
9207
9259
|
rds: {
|
|
9208
9260
|
host: "",
|
|
9209
9261
|
port: 5432,
|
|
9210
9262
|
username: "",
|
|
9211
|
-
password_env: "
|
|
9263
|
+
password_env: "SEARCH_DATABASE_PASSWORD",
|
|
9212
9264
|
ssl: true
|
|
9213
9265
|
}
|
|
9214
9266
|
};
|
|
9215
|
-
if (
|
|
9267
|
+
if (existsSync(STORAGE_CONFIG_PATH)) {
|
|
9216
9268
|
try {
|
|
9217
|
-
const raw = JSON.parse(readFileSync(
|
|
9218
|
-
config.mode = raw.mode ?? config.mode;
|
|
9269
|
+
const raw = JSON.parse(readFileSync(STORAGE_CONFIG_PATH, "utf-8"));
|
|
9270
|
+
config.mode = normalizeMode(raw.mode) ?? config.mode;
|
|
9219
9271
|
config.rds = { ...config.rds, ...raw.rds ?? {} };
|
|
9220
9272
|
} catch {}
|
|
9221
9273
|
}
|
|
9222
|
-
const modeOverride =
|
|
9223
|
-
|
|
9224
|
-
|
|
9225
|
-
|
|
9274
|
+
const modeOverride = readEnv(SEARCH_STORAGE_MODE_ENV) ?? readEnv(SEARCH_STORAGE_MODE_FALLBACK_ENV);
|
|
9275
|
+
const normalizedMode = normalizeMode(modeOverride);
|
|
9276
|
+
if (normalizedMode) {
|
|
9277
|
+
config.mode = normalizedMode;
|
|
9278
|
+
} else if (getStorageDatabaseUrl() && config.mode === "local") {
|
|
9226
9279
|
config.mode = "hybrid";
|
|
9227
9280
|
}
|
|
9228
9281
|
return config;
|
|
9229
9282
|
}
|
|
9230
|
-
function
|
|
9231
|
-
const direct =
|
|
9283
|
+
function getStorageConnectionString(dbName = "search") {
|
|
9284
|
+
const direct = getStorageDatabaseUrl();
|
|
9232
9285
|
if (direct)
|
|
9233
9286
|
return direct;
|
|
9234
|
-
const config =
|
|
9287
|
+
const config = getStorageConfig();
|
|
9235
9288
|
const { host, port, username, password_env, ssl } = config.rds;
|
|
9236
9289
|
if (!host || !username) {
|
|
9237
|
-
throw new Error("
|
|
9290
|
+
throw new Error("Storage database is not configured. Set HASNA_SEARCH_DATABASE_URL or configure ~/.hasna/search/storage/config.json.");
|
|
9238
9291
|
}
|
|
9239
|
-
const password =
|
|
9292
|
+
const password = process.env[password_env];
|
|
9240
9293
|
if (!password) {
|
|
9241
|
-
throw new Error(`
|
|
9294
|
+
throw new Error(`Storage database password is not set. Export ${password_env}.`);
|
|
9242
9295
|
}
|
|
9243
9296
|
const sslParam = ssl ? "?sslmode=require" : "";
|
|
9244
9297
|
return `postgres://${username}:${encodeURIComponent(password)}@${host}:${port}/${dbName}${sslParam}`;
|
|
@@ -9429,8 +9482,8 @@ async function applyPgMigrations(connectionString) {
|
|
|
9429
9482
|
}
|
|
9430
9483
|
return result;
|
|
9431
9484
|
}
|
|
9432
|
-
// src/db/
|
|
9433
|
-
var
|
|
9485
|
+
// src/db/storage-sync.ts
|
|
9486
|
+
var STORAGE_TABLES = [
|
|
9434
9487
|
"searches",
|
|
9435
9488
|
"search_results",
|
|
9436
9489
|
"saved_searches",
|
|
@@ -9517,21 +9570,26 @@ function upsertSqlite(db, table, rows) {
|
|
|
9517
9570
|
}
|
|
9518
9571
|
return written;
|
|
9519
9572
|
}
|
|
9520
|
-
async function
|
|
9521
|
-
return new PgAdapterAsync(
|
|
9573
|
+
async function getStoragePg() {
|
|
9574
|
+
return new PgAdapterAsync(getStorageConnectionString("search"));
|
|
9522
9575
|
}
|
|
9523
|
-
async function
|
|
9576
|
+
async function runStorageMigrations(remote) {
|
|
9524
9577
|
for (const migration of PG_MIGRATIONS) {
|
|
9525
9578
|
await remote.exec(migration);
|
|
9526
9579
|
}
|
|
9527
9580
|
}
|
|
9528
|
-
function
|
|
9529
|
-
const config =
|
|
9581
|
+
function getStorageStatus(db = getDb()) {
|
|
9582
|
+
const config = getStorageConfig();
|
|
9583
|
+
const activeEnv = getStorageDatabaseEnv();
|
|
9530
9584
|
return {
|
|
9585
|
+
configured: Boolean(activeEnv) || Boolean(config.rds.host && config.rds.username),
|
|
9531
9586
|
mode: config.mode,
|
|
9532
|
-
enabled: config.mode === "hybrid" || config.mode === "
|
|
9587
|
+
enabled: config.mode === "hybrid" || config.mode === "remote",
|
|
9588
|
+
env: STORAGE_DATABASE_ENV,
|
|
9589
|
+
activeEnv: activeEnv?.name ?? null,
|
|
9590
|
+
service: "search",
|
|
9533
9591
|
db_path: getDbPath(),
|
|
9534
|
-
tables:
|
|
9592
|
+
tables: STORAGE_TABLES.map((table) => {
|
|
9535
9593
|
try {
|
|
9536
9594
|
const row = db.query(`SELECT COUNT(*) as count FROM ${quoteId(table)}`).get();
|
|
9537
9595
|
return { table, rows: row.count };
|
|
@@ -9541,12 +9599,12 @@ function getCloudStatus(db = getDb()) {
|
|
|
9541
9599
|
})
|
|
9542
9600
|
};
|
|
9543
9601
|
}
|
|
9544
|
-
async function
|
|
9602
|
+
async function pushStorageChanges(tables = [...STORAGE_TABLES]) {
|
|
9545
9603
|
const db = getDb();
|
|
9546
|
-
const remote = await
|
|
9604
|
+
const remote = await getStoragePg();
|
|
9547
9605
|
const results = [];
|
|
9548
9606
|
try {
|
|
9549
|
-
await
|
|
9607
|
+
await runStorageMigrations(remote);
|
|
9550
9608
|
for (const table of tables) {
|
|
9551
9609
|
const result = { table, direction: "push", rowsRead: 0, rowsWritten: 0, errors: [] };
|
|
9552
9610
|
try {
|
|
@@ -9563,12 +9621,12 @@ async function pushCloudChanges(tables = [...CLOUD_TABLES]) {
|
|
|
9563
9621
|
}
|
|
9564
9622
|
return results;
|
|
9565
9623
|
}
|
|
9566
|
-
async function
|
|
9624
|
+
async function pullStorageChanges(tables = [...STORAGE_TABLES]) {
|
|
9567
9625
|
const db = getDb();
|
|
9568
|
-
const remote = await
|
|
9626
|
+
const remote = await getStoragePg();
|
|
9569
9627
|
const results = [];
|
|
9570
9628
|
try {
|
|
9571
|
-
await
|
|
9629
|
+
await runStorageMigrations(remote);
|
|
9572
9630
|
for (const table of tables) {
|
|
9573
9631
|
const result = { table, direction: "pull", rowsRead: 0, rowsWritten: 0, errors: [] };
|
|
9574
9632
|
try {
|
|
@@ -9585,17 +9643,21 @@ async function pullCloudChanges(tables = [...CLOUD_TABLES]) {
|
|
|
9585
9643
|
}
|
|
9586
9644
|
return results;
|
|
9587
9645
|
}
|
|
9588
|
-
async function
|
|
9646
|
+
async function syncStorageChanges(tables = [...STORAGE_TABLES]) {
|
|
9589
9647
|
return {
|
|
9590
|
-
push: await
|
|
9591
|
-
pull: await
|
|
9648
|
+
push: await pushStorageChanges(tables),
|
|
9649
|
+
pull: await pullStorageChanges(tables)
|
|
9592
9650
|
};
|
|
9593
9651
|
}
|
|
9594
|
-
function
|
|
9652
|
+
function parseStorageTables(raw) {
|
|
9595
9653
|
if (!raw)
|
|
9596
|
-
return [...
|
|
9654
|
+
return [...STORAGE_TABLES];
|
|
9597
9655
|
const requested = raw.split(",").map((table) => table.trim()).filter(Boolean);
|
|
9598
|
-
|
|
9656
|
+
const allowed = new Set(STORAGE_TABLES);
|
|
9657
|
+
const invalid = requested.filter((table) => !allowed.has(table));
|
|
9658
|
+
if (invalid.length > 0)
|
|
9659
|
+
throw new Error(`Unknown search storage table(s): ${invalid.join(", ")}`);
|
|
9660
|
+
return requested.length > 0 ? requested : [...STORAGE_TABLES];
|
|
9599
9661
|
}
|
|
9600
9662
|
// src/db/searches.ts
|
|
9601
9663
|
function rowToSearch(row) {
|
|
@@ -9814,6 +9876,726 @@ function updateSavedSearchLastRun(id, db) {
|
|
|
9814
9876
|
const now = new Date().toISOString();
|
|
9815
9877
|
d.prepare("UPDATE saved_searches SET last_run_at = ? WHERE id = ?").run(now, id);
|
|
9816
9878
|
}
|
|
9879
|
+
// src/lib/local/indexer.ts
|
|
9880
|
+
import { readFileSync as readFileSync4, statSync as statSync2 } from "fs";
|
|
9881
|
+
import { homedir as homedir2 } from "os";
|
|
9882
|
+
import { basename, resolve } from "path";
|
|
9883
|
+
|
|
9884
|
+
// src/db/index-db.ts
|
|
9885
|
+
import { Database as Database2 } from "bun:sqlite";
|
|
9886
|
+
import { mkdirSync as mkdirSync2 } from "fs";
|
|
9887
|
+
|
|
9888
|
+
// src/db/index-migrations.ts
|
|
9889
|
+
var migrations2 = [
|
|
9890
|
+
{
|
|
9891
|
+
version: 1,
|
|
9892
|
+
description: "Local file index core",
|
|
9893
|
+
up: (db) => {
|
|
9894
|
+
db.exec(`
|
|
9895
|
+
CREATE TABLE IF NOT EXISTS index_roots (
|
|
9896
|
+
id TEXT PRIMARY KEY,
|
|
9897
|
+
path TEXT NOT NULL UNIQUE,
|
|
9898
|
+
name TEXT NOT NULL,
|
|
9899
|
+
exclude TEXT NOT NULL DEFAULT '[]',
|
|
9900
|
+
content_indexing INTEGER NOT NULL DEFAULT 1,
|
|
9901
|
+
max_file_size INTEGER NOT NULL DEFAULT 524288,
|
|
9902
|
+
status TEXT NOT NULL DEFAULT 'pending',
|
|
9903
|
+
error TEXT,
|
|
9904
|
+
file_count INTEGER NOT NULL DEFAULT 0,
|
|
9905
|
+
last_indexed_at TEXT,
|
|
9906
|
+
last_duration_ms INTEGER,
|
|
9907
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
9908
|
+
);
|
|
9909
|
+
|
|
9910
|
+
CREATE TABLE IF NOT EXISTS files (
|
|
9911
|
+
id INTEGER PRIMARY KEY,
|
|
9912
|
+
root_id TEXT NOT NULL REFERENCES index_roots(id) ON DELETE CASCADE,
|
|
9913
|
+
rel_path TEXT NOT NULL,
|
|
9914
|
+
name TEXT NOT NULL,
|
|
9915
|
+
ext TEXT NOT NULL DEFAULT '',
|
|
9916
|
+
dir TEXT NOT NULL DEFAULT '',
|
|
9917
|
+
size INTEGER NOT NULL,
|
|
9918
|
+
mtime_ms INTEGER NOT NULL,
|
|
9919
|
+
is_binary INTEGER NOT NULL DEFAULT 0,
|
|
9920
|
+
content_indexed INTEGER NOT NULL DEFAULT 0,
|
|
9921
|
+
indexed_at TEXT NOT NULL,
|
|
9922
|
+
UNIQUE(root_id, rel_path)
|
|
9923
|
+
);
|
|
9924
|
+
CREATE INDEX IF NOT EXISTS idx_files_root ON files(root_id);
|
|
9925
|
+
CREATE INDEX IF NOT EXISTS idx_files_name ON files(name);
|
|
9926
|
+
|
|
9927
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS files_fts USING fts5(
|
|
9928
|
+
name, rel_path,
|
|
9929
|
+
content='files',
|
|
9930
|
+
content_rowid='id',
|
|
9931
|
+
tokenize='trigram'
|
|
9932
|
+
);
|
|
9933
|
+
|
|
9934
|
+
CREATE TRIGGER IF NOT EXISTS files_ai AFTER INSERT ON files BEGIN
|
|
9935
|
+
INSERT INTO files_fts(rowid, name, rel_path)
|
|
9936
|
+
VALUES (NEW.id, NEW.name, NEW.rel_path);
|
|
9937
|
+
END;
|
|
9938
|
+
|
|
9939
|
+
CREATE TRIGGER IF NOT EXISTS files_ad AFTER DELETE ON files BEGIN
|
|
9940
|
+
INSERT INTO files_fts(files_fts, rowid, name, rel_path)
|
|
9941
|
+
VALUES ('delete', OLD.id, OLD.name, OLD.rel_path);
|
|
9942
|
+
END;
|
|
9943
|
+
|
|
9944
|
+
CREATE TRIGGER IF NOT EXISTS files_au AFTER UPDATE OF name, rel_path ON files BEGIN
|
|
9945
|
+
INSERT INTO files_fts(files_fts, rowid, name, rel_path)
|
|
9946
|
+
VALUES ('delete', OLD.id, OLD.name, OLD.rel_path);
|
|
9947
|
+
INSERT INTO files_fts(rowid, name, rel_path)
|
|
9948
|
+
VALUES (NEW.id, NEW.name, NEW.rel_path);
|
|
9949
|
+
END;
|
|
9950
|
+
|
|
9951
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS file_content_fts USING fts5(
|
|
9952
|
+
body,
|
|
9953
|
+
content='',
|
|
9954
|
+
contentless_delete=1,
|
|
9955
|
+
tokenize='trigram'
|
|
9956
|
+
);
|
|
9957
|
+
`);
|
|
9958
|
+
}
|
|
9959
|
+
}
|
|
9960
|
+
];
|
|
9961
|
+
function runIndexMigrations(db) {
|
|
9962
|
+
db.exec(`
|
|
9963
|
+
CREATE TABLE IF NOT EXISTS _migrations (
|
|
9964
|
+
version INTEGER PRIMARY KEY,
|
|
9965
|
+
description TEXT NOT NULL,
|
|
9966
|
+
applied_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
9967
|
+
);
|
|
9968
|
+
`);
|
|
9969
|
+
const applied = new Set(db.query("SELECT version FROM _migrations").all().map((row) => row.version));
|
|
9970
|
+
for (const migration of migrations2) {
|
|
9971
|
+
if (applied.has(migration.version))
|
|
9972
|
+
continue;
|
|
9973
|
+
db.exec("BEGIN");
|
|
9974
|
+
try {
|
|
9975
|
+
migration.up(db);
|
|
9976
|
+
db.prepare("INSERT OR IGNORE INTO _migrations (version, description) VALUES (?, ?)").run(migration.version, migration.description);
|
|
9977
|
+
db.exec("COMMIT");
|
|
9978
|
+
} catch (err) {
|
|
9979
|
+
db.exec("ROLLBACK");
|
|
9980
|
+
throw err;
|
|
9981
|
+
}
|
|
9982
|
+
}
|
|
9983
|
+
}
|
|
9984
|
+
|
|
9985
|
+
// src/db/index-db.ts
|
|
9986
|
+
var instance2 = null;
|
|
9987
|
+
function getIndexDbPath() {
|
|
9988
|
+
const envPath = Bun.env.HASNA_SEARCH_INDEX_DB_PATH ?? Bun.env.SEARCH_INDEX_DB_PATH;
|
|
9989
|
+
if (envPath)
|
|
9990
|
+
return envPath;
|
|
9991
|
+
const home = Bun.env.HOME ?? "/tmp";
|
|
9992
|
+
const dir = `${home}/.hasna/search`;
|
|
9993
|
+
mkdirSync2(dir, { recursive: true });
|
|
9994
|
+
return `${dir}/index.db`;
|
|
9995
|
+
}
|
|
9996
|
+
function configure(db) {
|
|
9997
|
+
db.exec("PRAGMA busy_timeout = 5000");
|
|
9998
|
+
db.exec("PRAGMA journal_mode = WAL");
|
|
9999
|
+
db.exec("PRAGMA foreign_keys = ON");
|
|
10000
|
+
db.exec("PRAGMA synchronous = NORMAL");
|
|
10001
|
+
runIndexMigrations(db);
|
|
10002
|
+
return db;
|
|
10003
|
+
}
|
|
10004
|
+
function getIndexDb() {
|
|
10005
|
+
if (instance2)
|
|
10006
|
+
return instance2;
|
|
10007
|
+
instance2 = configure(new Database2(getIndexDbPath()));
|
|
10008
|
+
return instance2;
|
|
10009
|
+
}
|
|
10010
|
+
function closeIndexDb() {
|
|
10011
|
+
if (instance2) {
|
|
10012
|
+
instance2.close();
|
|
10013
|
+
instance2 = null;
|
|
10014
|
+
}
|
|
10015
|
+
}
|
|
10016
|
+
function getIndexDbForTesting() {
|
|
10017
|
+
return configure(new Database2(":memory:"));
|
|
10018
|
+
}
|
|
10019
|
+
|
|
10020
|
+
// src/lib/config.ts
|
|
10021
|
+
import { mkdirSync as mkdirSync3, readFileSync as readFileSync2, writeFileSync, existsSync as existsSync2 } from "fs";
|
|
10022
|
+
function getConfigDir() {
|
|
10023
|
+
const home = Bun.env.HOME ?? "/tmp";
|
|
10024
|
+
const dir = `${home}/.hasna/search`;
|
|
10025
|
+
mkdirSync3(dir, { recursive: true });
|
|
10026
|
+
return dir;
|
|
10027
|
+
}
|
|
10028
|
+
function getConfigPath() {
|
|
10029
|
+
return `${getConfigDir()}/config.json`;
|
|
10030
|
+
}
|
|
10031
|
+
function getConfig() {
|
|
10032
|
+
const path = getConfigPath();
|
|
10033
|
+
if (!existsSync2(path)) {
|
|
10034
|
+
return { ...DEFAULT_CONFIG };
|
|
10035
|
+
}
|
|
10036
|
+
try {
|
|
10037
|
+
const raw = readFileSync2(path, "utf-8");
|
|
10038
|
+
const parsed = JSON.parse(raw);
|
|
10039
|
+
return { ...DEFAULT_CONFIG, ...parsed };
|
|
10040
|
+
} catch {
|
|
10041
|
+
return { ...DEFAULT_CONFIG };
|
|
10042
|
+
}
|
|
10043
|
+
}
|
|
10044
|
+
function setConfig(updates) {
|
|
10045
|
+
const current = getConfig();
|
|
10046
|
+
const merged = { ...current, ...updates };
|
|
10047
|
+
const path = getConfigPath();
|
|
10048
|
+
writeFileSync(path, JSON.stringify(merged, null, 2), "utf-8");
|
|
10049
|
+
return merged;
|
|
10050
|
+
}
|
|
10051
|
+
function resetConfig() {
|
|
10052
|
+
const path = getConfigPath();
|
|
10053
|
+
writeFileSync(path, JSON.stringify(DEFAULT_CONFIG, null, 2), "utf-8");
|
|
10054
|
+
return { ...DEFAULT_CONFIG };
|
|
10055
|
+
}
|
|
10056
|
+
function getConfigValue(key) {
|
|
10057
|
+
const config = getConfig();
|
|
10058
|
+
return config[key];
|
|
10059
|
+
}
|
|
10060
|
+
function setConfigValue(key, value) {
|
|
10061
|
+
const config = getConfig();
|
|
10062
|
+
config[key] = value;
|
|
10063
|
+
return setConfig(config);
|
|
10064
|
+
}
|
|
10065
|
+
|
|
10066
|
+
// src/lib/local/walker.ts
|
|
10067
|
+
import { readdirSync, readFileSync as readFileSync3, statSync, openSync, readSync, closeSync } from "fs";
|
|
10068
|
+
import { join as join2 } from "path";
|
|
10069
|
+
|
|
10070
|
+
// src/lib/local/ignore.ts
|
|
10071
|
+
function classEnd(pattern, open) {
|
|
10072
|
+
let i = open + 1;
|
|
10073
|
+
if (pattern[i] === "!")
|
|
10074
|
+
i++;
|
|
10075
|
+
if (pattern[i] === "]")
|
|
10076
|
+
i++;
|
|
10077
|
+
for (;i < pattern.length; i++) {
|
|
10078
|
+
if (pattern[i] === "\\") {
|
|
10079
|
+
i++;
|
|
10080
|
+
continue;
|
|
10081
|
+
}
|
|
10082
|
+
if (pattern[i] === "]")
|
|
10083
|
+
return i;
|
|
10084
|
+
}
|
|
10085
|
+
return -1;
|
|
10086
|
+
}
|
|
10087
|
+
function globToRegExp(pattern) {
|
|
10088
|
+
let out = "";
|
|
10089
|
+
let i = 0;
|
|
10090
|
+
while (i < pattern.length) {
|
|
10091
|
+
const ch = pattern[i];
|
|
10092
|
+
if (ch === "*") {
|
|
10093
|
+
if (pattern[i + 1] === "*") {
|
|
10094
|
+
const segmentStart = i === 0 || pattern[i - 1] === "/";
|
|
10095
|
+
if (segmentStart && pattern[i + 2] === "/") {
|
|
10096
|
+
out += "(?:[^/]*/)*";
|
|
10097
|
+
i += 3;
|
|
10098
|
+
} else if (segmentStart && i + 2 === pattern.length) {
|
|
10099
|
+
out += ".*";
|
|
10100
|
+
i += 2;
|
|
10101
|
+
} else {
|
|
10102
|
+
out += "[^/]*";
|
|
10103
|
+
i += 2;
|
|
10104
|
+
}
|
|
10105
|
+
} else {
|
|
10106
|
+
out += "[^/]*";
|
|
10107
|
+
i += 1;
|
|
10108
|
+
}
|
|
10109
|
+
} else if (ch === "?") {
|
|
10110
|
+
out += "[^/]";
|
|
10111
|
+
i += 1;
|
|
10112
|
+
} else if (ch === "[") {
|
|
10113
|
+
const close = classEnd(pattern, i);
|
|
10114
|
+
if (close === -1) {
|
|
10115
|
+
out += "\\[";
|
|
10116
|
+
i += 1;
|
|
10117
|
+
} else {
|
|
10118
|
+
let cls = pattern.slice(i + 1, close);
|
|
10119
|
+
if (cls.startsWith("!"))
|
|
10120
|
+
cls = "^" + cls.slice(1);
|
|
10121
|
+
let jsCls = "";
|
|
10122
|
+
for (let k = 0;k < cls.length; k++) {
|
|
10123
|
+
const c = cls[k];
|
|
10124
|
+
if (c === "\\" && k + 1 < cls.length) {
|
|
10125
|
+
jsCls += "\\" + cls[k + 1];
|
|
10126
|
+
k++;
|
|
10127
|
+
} else if (c === "]") {
|
|
10128
|
+
jsCls += "\\]";
|
|
10129
|
+
} else if (c === "\\") {
|
|
10130
|
+
jsCls += "\\\\";
|
|
10131
|
+
} else {
|
|
10132
|
+
jsCls += c;
|
|
10133
|
+
}
|
|
10134
|
+
}
|
|
10135
|
+
out += `[${jsCls}]`;
|
|
10136
|
+
i = close + 1;
|
|
10137
|
+
}
|
|
10138
|
+
} else if (ch === "\\" && i + 1 < pattern.length) {
|
|
10139
|
+
out += pattern[i + 1].replace(/[.+^${}()|[\]\\*?]/g, "\\$&");
|
|
10140
|
+
i += 2;
|
|
10141
|
+
} else {
|
|
10142
|
+
out += ch.replace(/[.+^${}()|[\]\\]/g, "\\$&");
|
|
10143
|
+
i += 1;
|
|
10144
|
+
}
|
|
10145
|
+
}
|
|
10146
|
+
return out;
|
|
10147
|
+
}
|
|
10148
|
+
function compile(raw) {
|
|
10149
|
+
let pattern = raw.replace(/\r$/, "");
|
|
10150
|
+
if (!pattern.trim() || pattern.startsWith("#"))
|
|
10151
|
+
return null;
|
|
10152
|
+
let negated = false;
|
|
10153
|
+
if (pattern.startsWith("!")) {
|
|
10154
|
+
negated = true;
|
|
10155
|
+
pattern = pattern.slice(1);
|
|
10156
|
+
}
|
|
10157
|
+
pattern = pattern.replace(/(?<!\\) +$/, "");
|
|
10158
|
+
if (!pattern)
|
|
10159
|
+
return null;
|
|
10160
|
+
let dirOnly = false;
|
|
10161
|
+
if (pattern.endsWith("/")) {
|
|
10162
|
+
dirOnly = true;
|
|
10163
|
+
pattern = pattern.slice(0, -1);
|
|
10164
|
+
}
|
|
10165
|
+
const anchored = pattern.includes("/");
|
|
10166
|
+
if (pattern.startsWith("/"))
|
|
10167
|
+
pattern = pattern.slice(1);
|
|
10168
|
+
const body = globToRegExp(pattern);
|
|
10169
|
+
const prefix = anchored ? "^" : "^(?:.*/)?";
|
|
10170
|
+
const regex = new RegExp(`${prefix}${body}$`);
|
|
10171
|
+
return { regex, negated, dirOnly };
|
|
10172
|
+
}
|
|
10173
|
+
|
|
10174
|
+
class IgnoreMatcher {
|
|
10175
|
+
patterns;
|
|
10176
|
+
base;
|
|
10177
|
+
constructor(lines, base = "") {
|
|
10178
|
+
this.base = base;
|
|
10179
|
+
this.patterns = lines.map(compile).filter((p) => p !== null);
|
|
10180
|
+
}
|
|
10181
|
+
ignores(relPath, isDir) {
|
|
10182
|
+
let local = relPath;
|
|
10183
|
+
if (this.base) {
|
|
10184
|
+
if (!relPath.startsWith(this.base + "/"))
|
|
10185
|
+
return;
|
|
10186
|
+
local = relPath.slice(this.base.length + 1);
|
|
10187
|
+
}
|
|
10188
|
+
let result;
|
|
10189
|
+
for (const p of this.patterns) {
|
|
10190
|
+
if (p.dirOnly && !isDir)
|
|
10191
|
+
continue;
|
|
10192
|
+
if (p.regex.test(local))
|
|
10193
|
+
result = !p.negated;
|
|
10194
|
+
}
|
|
10195
|
+
return result;
|
|
10196
|
+
}
|
|
10197
|
+
}
|
|
10198
|
+
|
|
10199
|
+
class IgnoreStack {
|
|
10200
|
+
hard;
|
|
10201
|
+
stack = [];
|
|
10202
|
+
constructor(hard = null) {
|
|
10203
|
+
this.hard = hard;
|
|
10204
|
+
}
|
|
10205
|
+
push(matcher) {
|
|
10206
|
+
this.stack.push(matcher);
|
|
10207
|
+
}
|
|
10208
|
+
pop() {
|
|
10209
|
+
this.stack.pop();
|
|
10210
|
+
}
|
|
10211
|
+
ignores(relPath, isDir) {
|
|
10212
|
+
const hardResult = this.hard?.ignores(relPath, isDir);
|
|
10213
|
+
if (hardResult !== undefined)
|
|
10214
|
+
return hardResult;
|
|
10215
|
+
for (let i = this.stack.length - 1;i >= 0; i--) {
|
|
10216
|
+
const result = this.stack[i].ignores(relPath, isDir);
|
|
10217
|
+
if (result !== undefined)
|
|
10218
|
+
return result;
|
|
10219
|
+
}
|
|
10220
|
+
return false;
|
|
10221
|
+
}
|
|
10222
|
+
}
|
|
10223
|
+
var DEFAULT_EXCLUDES = [
|
|
10224
|
+
".git/",
|
|
10225
|
+
"node_modules/",
|
|
10226
|
+
"__pycache__/",
|
|
10227
|
+
".venv/",
|
|
10228
|
+
"venv/",
|
|
10229
|
+
".tox/",
|
|
10230
|
+
"dist/",
|
|
10231
|
+
"build/",
|
|
10232
|
+
"out/",
|
|
10233
|
+
".next/",
|
|
10234
|
+
".nuxt/",
|
|
10235
|
+
".cache/",
|
|
10236
|
+
".turbo/",
|
|
10237
|
+
"target/",
|
|
10238
|
+
"coverage/",
|
|
10239
|
+
".pnpm-store/",
|
|
10240
|
+
".bun/",
|
|
10241
|
+
".DS_Store"
|
|
10242
|
+
];
|
|
10243
|
+
|
|
10244
|
+
// src/lib/local/walker.ts
|
|
10245
|
+
var BINARY_EXTENSIONS = new Set([
|
|
10246
|
+
"png",
|
|
10247
|
+
"jpg",
|
|
10248
|
+
"jpeg",
|
|
10249
|
+
"gif",
|
|
10250
|
+
"webp",
|
|
10251
|
+
"avif",
|
|
10252
|
+
"ico",
|
|
10253
|
+
"bmp",
|
|
10254
|
+
"tiff",
|
|
10255
|
+
"heic",
|
|
10256
|
+
"pdf",
|
|
10257
|
+
"zip",
|
|
10258
|
+
"gz",
|
|
10259
|
+
"tgz",
|
|
10260
|
+
"tar",
|
|
10261
|
+
"bz2",
|
|
10262
|
+
"xz",
|
|
10263
|
+
"zst",
|
|
10264
|
+
"7z",
|
|
10265
|
+
"rar",
|
|
10266
|
+
"exe",
|
|
10267
|
+
"dll",
|
|
10268
|
+
"so",
|
|
10269
|
+
"dylib",
|
|
10270
|
+
"a",
|
|
10271
|
+
"o",
|
|
10272
|
+
"obj",
|
|
10273
|
+
"class",
|
|
10274
|
+
"jar",
|
|
10275
|
+
"war",
|
|
10276
|
+
"pyc",
|
|
10277
|
+
"wasm",
|
|
10278
|
+
"node",
|
|
10279
|
+
"woff",
|
|
10280
|
+
"woff2",
|
|
10281
|
+
"ttf",
|
|
10282
|
+
"otf",
|
|
10283
|
+
"eot",
|
|
10284
|
+
"mp3",
|
|
10285
|
+
"mp4",
|
|
10286
|
+
"m4a",
|
|
10287
|
+
"avi",
|
|
10288
|
+
"mov",
|
|
10289
|
+
"mkv",
|
|
10290
|
+
"webm",
|
|
10291
|
+
"wav",
|
|
10292
|
+
"flac",
|
|
10293
|
+
"ogg",
|
|
10294
|
+
"sqlite",
|
|
10295
|
+
"db",
|
|
10296
|
+
"bin",
|
|
10297
|
+
"dat",
|
|
10298
|
+
"ds_store"
|
|
10299
|
+
]);
|
|
10300
|
+
var CONTENT_EXCLUDED_NAMES = [/\.lock$/, /-lock\.(json|yaml)$/, /\.min\.(js|css)$/, /\.map$/, /\.svg$/];
|
|
10301
|
+
function hasBinaryExtension(ext) {
|
|
10302
|
+
return BINARY_EXTENSIONS.has(ext.toLowerCase());
|
|
10303
|
+
}
|
|
10304
|
+
function isContentExcluded(name) {
|
|
10305
|
+
return CONTENT_EXCLUDED_NAMES.some((re) => re.test(name));
|
|
10306
|
+
}
|
|
10307
|
+
function isBinaryFile(absPath) {
|
|
10308
|
+
const buf = Buffer.alloc(8192);
|
|
10309
|
+
let fd;
|
|
10310
|
+
try {
|
|
10311
|
+
fd = openSync(absPath, "r");
|
|
10312
|
+
} catch {
|
|
10313
|
+
return true;
|
|
10314
|
+
}
|
|
10315
|
+
try {
|
|
10316
|
+
const read = readSync(fd, buf, 0, buf.length, 0);
|
|
10317
|
+
for (let i = 0;i < read; i++) {
|
|
10318
|
+
if (buf[i] === 0)
|
|
10319
|
+
return true;
|
|
10320
|
+
}
|
|
10321
|
+
return false;
|
|
10322
|
+
} finally {
|
|
10323
|
+
closeSync(fd);
|
|
10324
|
+
}
|
|
10325
|
+
}
|
|
10326
|
+
function readGitignore(dirAbs) {
|
|
10327
|
+
try {
|
|
10328
|
+
return readFileSync3(join2(dirAbs, ".gitignore"), "utf-8").split(`
|
|
10329
|
+
`);
|
|
10330
|
+
} catch {
|
|
10331
|
+
return null;
|
|
10332
|
+
}
|
|
10333
|
+
}
|
|
10334
|
+
function extOf(name) {
|
|
10335
|
+
const idx = name.lastIndexOf(".");
|
|
10336
|
+
return idx > 0 ? name.slice(idx + 1).toLowerCase() : "";
|
|
10337
|
+
}
|
|
10338
|
+
function scanRoot(rootPath, extraExcludes = []) {
|
|
10339
|
+
const stack = new IgnoreStack(new IgnoreMatcher([...DEFAULT_EXCLUDES, ...extraExcludes]));
|
|
10340
|
+
const files = [];
|
|
10341
|
+
const skippedDirs = [];
|
|
10342
|
+
const walk = (dirAbs, dirRel) => {
|
|
10343
|
+
const gitignore = readGitignore(dirAbs);
|
|
10344
|
+
if (gitignore)
|
|
10345
|
+
stack.push(new IgnoreMatcher(gitignore, dirRel));
|
|
10346
|
+
let entries;
|
|
10347
|
+
try {
|
|
10348
|
+
entries = readdirSync(dirAbs, { withFileTypes: true });
|
|
10349
|
+
} catch (err) {
|
|
10350
|
+
if (gitignore)
|
|
10351
|
+
stack.pop();
|
|
10352
|
+
if (dirRel === "") {
|
|
10353
|
+
throw new Error(`Cannot read index root ${dirAbs}: ${err instanceof Error ? err.message : err}`);
|
|
10354
|
+
}
|
|
10355
|
+
if (err.code !== "ENOENT")
|
|
10356
|
+
skippedDirs.push(dirRel);
|
|
10357
|
+
return;
|
|
10358
|
+
}
|
|
10359
|
+
for (const entry of entries) {
|
|
10360
|
+
if (entry.isSymbolicLink())
|
|
10361
|
+
continue;
|
|
10362
|
+
const relPath = dirRel ? `${dirRel}/${entry.name}` : entry.name;
|
|
10363
|
+
if (entry.isDirectory()) {
|
|
10364
|
+
if (entry.name === ".git")
|
|
10365
|
+
continue;
|
|
10366
|
+
if (stack.ignores(relPath, true))
|
|
10367
|
+
continue;
|
|
10368
|
+
walk(join2(dirAbs, entry.name), relPath);
|
|
10369
|
+
} else if (entry.isFile()) {
|
|
10370
|
+
if (stack.ignores(relPath, false))
|
|
10371
|
+
continue;
|
|
10372
|
+
let stat;
|
|
10373
|
+
try {
|
|
10374
|
+
stat = statSync(join2(dirAbs, entry.name));
|
|
10375
|
+
} catch {
|
|
10376
|
+
continue;
|
|
10377
|
+
}
|
|
10378
|
+
files.push({
|
|
10379
|
+
relPath,
|
|
10380
|
+
name: entry.name,
|
|
10381
|
+
ext: extOf(entry.name),
|
|
10382
|
+
dir: dirRel,
|
|
10383
|
+
size: stat.size,
|
|
10384
|
+
mtimeMs: Math.floor(stat.mtimeMs)
|
|
10385
|
+
});
|
|
10386
|
+
}
|
|
10387
|
+
}
|
|
10388
|
+
if (gitignore)
|
|
10389
|
+
stack.pop();
|
|
10390
|
+
};
|
|
10391
|
+
walk(rootPath, "");
|
|
10392
|
+
files.sort((a, b) => a.relPath < b.relPath ? -1 : a.relPath > b.relPath ? 1 : 0);
|
|
10393
|
+
return { files, skippedDirs };
|
|
10394
|
+
}
|
|
10395
|
+
|
|
10396
|
+
// src/lib/local/indexer.ts
|
|
10397
|
+
function rowToRoot(row) {
|
|
10398
|
+
return {
|
|
10399
|
+
id: row.id,
|
|
10400
|
+
path: row.path,
|
|
10401
|
+
name: row.name,
|
|
10402
|
+
exclude: JSON.parse(row.exclude),
|
|
10403
|
+
contentIndexing: Boolean(row.content_indexing),
|
|
10404
|
+
maxFileSize: row.max_file_size,
|
|
10405
|
+
status: row.status,
|
|
10406
|
+
error: row.error,
|
|
10407
|
+
fileCount: row.file_count,
|
|
10408
|
+
lastIndexedAt: row.last_indexed_at,
|
|
10409
|
+
lastDurationMs: row.last_duration_ms,
|
|
10410
|
+
createdAt: row.created_at
|
|
10411
|
+
};
|
|
10412
|
+
}
|
|
10413
|
+
function normalizeRootPath(path) {
|
|
10414
|
+
const expanded = path === "~" ? homedir2() : path.startsWith("~/") ? `${homedir2()}/${path.slice(2)}` : path;
|
|
10415
|
+
return resolve(expanded.replace(/\/+$/, "") || "/");
|
|
10416
|
+
}
|
|
10417
|
+
function listRoots(db) {
|
|
10418
|
+
const d = db ?? getIndexDb();
|
|
10419
|
+
const rows = d.query("SELECT * FROM index_roots ORDER BY path").all();
|
|
10420
|
+
return rows.map(rowToRoot);
|
|
10421
|
+
}
|
|
10422
|
+
function getRoot(ref, db) {
|
|
10423
|
+
const d = db ?? getIndexDb();
|
|
10424
|
+
const row = d.prepare(`SELECT *, CASE WHEN id = $ref THEN 0 WHEN path = $path THEN 1 ELSE 2 END AS priority
|
|
10425
|
+
FROM index_roots
|
|
10426
|
+
WHERE id = $ref OR path = $path OR name = $ref
|
|
10427
|
+
ORDER BY priority LIMIT 1`).get({ $ref: ref, $path: normalizeRootPath(ref) });
|
|
10428
|
+
return row ? rowToRoot(row) : null;
|
|
10429
|
+
}
|
|
10430
|
+
function hasReadyRoot(db) {
|
|
10431
|
+
const d = db ?? getIndexDb();
|
|
10432
|
+
const row = d.query("SELECT 1 FROM index_roots WHERE status = 'ready' LIMIT 1").get();
|
|
10433
|
+
return row !== null;
|
|
10434
|
+
}
|
|
10435
|
+
function addRoot(path, opts = {}, db) {
|
|
10436
|
+
const d = db ?? getIndexDb();
|
|
10437
|
+
const normalized = normalizeRootPath(path);
|
|
10438
|
+
let stat;
|
|
10439
|
+
try {
|
|
10440
|
+
stat = statSync2(normalized);
|
|
10441
|
+
} catch {
|
|
10442
|
+
throw new Error(`Path does not exist: ${normalized}`);
|
|
10443
|
+
}
|
|
10444
|
+
if (!stat.isDirectory())
|
|
10445
|
+
throw new Error(`Not a directory: ${normalized}`);
|
|
10446
|
+
if (opts.maxFileSize !== undefined && (!Number.isFinite(opts.maxFileSize) || opts.maxFileSize < 1)) {
|
|
10447
|
+
throw new Error("maxFileSize must be a positive number of bytes");
|
|
10448
|
+
}
|
|
10449
|
+
const existing = getRoot(normalized, d);
|
|
10450
|
+
if (existing && existing.path === normalized) {
|
|
10451
|
+
throw new Error(`Root already indexed: ${existing.path} (${existing.id})`);
|
|
10452
|
+
}
|
|
10453
|
+
const name = opts.name ?? basename(normalized);
|
|
10454
|
+
const nameClash = d.prepare("SELECT path FROM index_roots WHERE name = ?").get(name);
|
|
10455
|
+
if (nameClash) {
|
|
10456
|
+
throw new Error(`Root name "${name}" already used by ${nameClash.path} \u2014 pass a different name.`);
|
|
10457
|
+
}
|
|
10458
|
+
const id = generateId();
|
|
10459
|
+
d.prepare(`INSERT INTO index_roots (id, path, name, exclude, content_indexing, max_file_size, status, created_at)
|
|
10460
|
+
VALUES (?, ?, ?, ?, ?, ?, 'pending', ?)`).run(id, normalized, name, JSON.stringify(opts.exclude ?? []), opts.contentIndexing === false ? 0 : 1, opts.maxFileSize ?? 524288, new Date().toISOString());
|
|
10461
|
+
return getRoot(id, d);
|
|
10462
|
+
}
|
|
10463
|
+
function removeRoot(idOrPath, db) {
|
|
10464
|
+
const d = db ?? getIndexDb();
|
|
10465
|
+
const root = getRoot(idOrPath, d);
|
|
10466
|
+
if (!root)
|
|
10467
|
+
return false;
|
|
10468
|
+
d.exec("BEGIN");
|
|
10469
|
+
try {
|
|
10470
|
+
d.prepare("DELETE FROM file_content_fts WHERE rowid IN (SELECT id FROM files WHERE root_id = ? AND content_indexed = 1)").run(root.id);
|
|
10471
|
+
d.prepare("DELETE FROM index_roots WHERE id = ?").run(root.id);
|
|
10472
|
+
d.exec("COMMIT");
|
|
10473
|
+
} catch (err) {
|
|
10474
|
+
d.exec("ROLLBACK");
|
|
10475
|
+
throw err;
|
|
10476
|
+
}
|
|
10477
|
+
return true;
|
|
10478
|
+
}
|
|
10479
|
+
function shouldIndexContent(root, file) {
|
|
10480
|
+
return root.contentIndexing && file.size > 0 && file.size <= root.maxFileSize && !hasBinaryExtension(file.ext) && !isContentExcluded(file.name);
|
|
10481
|
+
}
|
|
10482
|
+
function indexRoot(idOrPath, opts = {}, db) {
|
|
10483
|
+
const d = db ?? getIndexDb();
|
|
10484
|
+
const root = getRoot(idOrPath, d);
|
|
10485
|
+
if (!root)
|
|
10486
|
+
throw new Error(`Index root not found: ${idOrPath}`);
|
|
10487
|
+
const start = Date.now();
|
|
10488
|
+
d.prepare("UPDATE index_roots SET status = 'indexing', error = NULL WHERE id = ?").run(root.id);
|
|
10489
|
+
try {
|
|
10490
|
+
const { files: scanned, skippedDirs } = scanRoot(root.path, root.exclude);
|
|
10491
|
+
const now = new Date().toISOString();
|
|
10492
|
+
const existingRows = d.prepare("SELECT id, rel_path, size, mtime_ms, content_indexed FROM files WHERE root_id = ?").all(root.id);
|
|
10493
|
+
const existing = new Map(existingRows.map((r) => [r.rel_path, r]));
|
|
10494
|
+
const insertFile = d.prepare(`INSERT INTO files (root_id, rel_path, name, ext, dir, size, mtime_ms, is_binary, content_indexed, indexed_at)
|
|
10495
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`);
|
|
10496
|
+
const updateFile = d.prepare("UPDATE files SET size = ?, mtime_ms = ?, is_binary = ?, content_indexed = ?, indexed_at = ? WHERE id = ?");
|
|
10497
|
+
const deleteFile = d.prepare("DELETE FROM files WHERE id = ?");
|
|
10498
|
+
const insertContent = d.prepare("INSERT INTO file_content_fts (rowid, body) VALUES (?, ?)");
|
|
10499
|
+
const deleteContent = d.prepare("DELETE FROM file_content_fts WHERE rowid = ?");
|
|
10500
|
+
const stats = {
|
|
10501
|
+
rootId: root.id,
|
|
10502
|
+
added: 0,
|
|
10503
|
+
updated: 0,
|
|
10504
|
+
deleted: 0,
|
|
10505
|
+
contentIndexed: 0,
|
|
10506
|
+
fileCount: scanned.length,
|
|
10507
|
+
skippedDirs: skippedDirs.length,
|
|
10508
|
+
durationMs: 0
|
|
10509
|
+
};
|
|
10510
|
+
d.exec("BEGIN");
|
|
10511
|
+
try {
|
|
10512
|
+
const seen = new Set;
|
|
10513
|
+
for (const file of scanned) {
|
|
10514
|
+
seen.add(file.relPath);
|
|
10515
|
+
const prev = existing.get(file.relPath);
|
|
10516
|
+
const changed = !prev || prev.size !== file.size || prev.mtime_ms !== file.mtimeMs;
|
|
10517
|
+
if (prev && !changed && !opts.force)
|
|
10518
|
+
continue;
|
|
10519
|
+
const wantContent = shouldIndexContent(root, file);
|
|
10520
|
+
const absPath = `${root.path}/${file.relPath}`;
|
|
10521
|
+
let isBinary = wantContent ? isBinaryFile(absPath) : hasBinaryExtension(file.ext);
|
|
10522
|
+
let body = null;
|
|
10523
|
+
if (wantContent && !isBinary) {
|
|
10524
|
+
try {
|
|
10525
|
+
body = readFileSync4(absPath, "utf-8");
|
|
10526
|
+
} catch {
|
|
10527
|
+
isBinary = true;
|
|
10528
|
+
}
|
|
10529
|
+
}
|
|
10530
|
+
const contentIndexed = body !== null ? 1 : 0;
|
|
10531
|
+
if (prev) {
|
|
10532
|
+
if (prev.content_indexed)
|
|
10533
|
+
deleteContent.run(prev.id);
|
|
10534
|
+
updateFile.run(file.size, file.mtimeMs, isBinary ? 1 : 0, contentIndexed, now, prev.id);
|
|
10535
|
+
if (body !== null)
|
|
10536
|
+
insertContent.run(prev.id, body);
|
|
10537
|
+
stats.updated++;
|
|
10538
|
+
} else {
|
|
10539
|
+
const inserted = insertFile.run(root.id, file.relPath, file.name, file.ext, file.dir, file.size, file.mtimeMs, isBinary ? 1 : 0, contentIndexed, now);
|
|
10540
|
+
if (body !== null)
|
|
10541
|
+
insertContent.run(Number(inserted.lastInsertRowid), body);
|
|
10542
|
+
stats.added++;
|
|
10543
|
+
}
|
|
10544
|
+
if (contentIndexed)
|
|
10545
|
+
stats.contentIndexed++;
|
|
10546
|
+
}
|
|
10547
|
+
for (const [relPath, row] of existing) {
|
|
10548
|
+
if (seen.has(relPath))
|
|
10549
|
+
continue;
|
|
10550
|
+
if (row.content_indexed)
|
|
10551
|
+
deleteContent.run(row.id);
|
|
10552
|
+
deleteFile.run(row.id);
|
|
10553
|
+
stats.deleted++;
|
|
10554
|
+
}
|
|
10555
|
+
stats.durationMs = Date.now() - start;
|
|
10556
|
+
d.prepare("UPDATE index_roots SET status = 'ready', file_count = ?, last_indexed_at = ?, last_duration_ms = ? WHERE id = ?").run(scanned.length, now, stats.durationMs, root.id);
|
|
10557
|
+
d.exec("COMMIT");
|
|
10558
|
+
} catch (err) {
|
|
10559
|
+
d.exec("ROLLBACK");
|
|
10560
|
+
throw err;
|
|
10561
|
+
}
|
|
10562
|
+
return stats;
|
|
10563
|
+
} catch (err) {
|
|
10564
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
10565
|
+
d.prepare("UPDATE index_roots SET status = 'error', error = ? WHERE id = ?").run(message, root.id);
|
|
10566
|
+
throw err;
|
|
10567
|
+
}
|
|
10568
|
+
}
|
|
10569
|
+
function indexAllRoots(opts = {}, db) {
|
|
10570
|
+
return listRoots(db).map((root) => indexRoot(root.id, opts, db));
|
|
10571
|
+
}
|
|
10572
|
+
var refreshing = new Set;
|
|
10573
|
+
function refreshStaleRoots(staleMinutes, db) {
|
|
10574
|
+
const cutoff = Date.now() - staleMinutes * 60000;
|
|
10575
|
+
const stats = [];
|
|
10576
|
+
for (const root of listRoots(db)) {
|
|
10577
|
+
if (root.status === "indexing" || root.status === "pending")
|
|
10578
|
+
continue;
|
|
10579
|
+
if (root.lastIndexedAt && Date.parse(root.lastIndexedAt) > cutoff)
|
|
10580
|
+
continue;
|
|
10581
|
+
if (refreshing.has(root.id))
|
|
10582
|
+
continue;
|
|
10583
|
+
refreshing.add(root.id);
|
|
10584
|
+
try {
|
|
10585
|
+
stats.push(indexRoot(root.id, {}, db));
|
|
10586
|
+
} catch {} finally {
|
|
10587
|
+
refreshing.delete(root.id);
|
|
10588
|
+
}
|
|
10589
|
+
}
|
|
10590
|
+
return stats;
|
|
10591
|
+
}
|
|
10592
|
+
function autoRefreshStaleRoots(db) {
|
|
10593
|
+
const config = getConfig();
|
|
10594
|
+
if (!config.indexAutoRefresh)
|
|
10595
|
+
return [];
|
|
10596
|
+
return refreshStaleRoots(config.indexStaleMinutes, db);
|
|
10597
|
+
}
|
|
10598
|
+
|
|
9817
10599
|
// src/db/providers.ts
|
|
9818
10600
|
function rowToProvider(row) {
|
|
9819
10601
|
return {
|
|
@@ -9873,6 +10655,8 @@ function updateProviderLastUsed(name, db) {
|
|
|
9873
10655
|
d.prepare("UPDATE providers SET last_used_at = ? WHERE name = ?").run(now, name);
|
|
9874
10656
|
}
|
|
9875
10657
|
function isProviderConfigured(provider) {
|
|
10658
|
+
if (LOCAL_PROVIDER_NAMES.has(provider.name))
|
|
10659
|
+
return hasReadyRoot();
|
|
9876
10660
|
if (!provider.apiKeyEnv)
|
|
9877
10661
|
return true;
|
|
9878
10662
|
return !!Bun.env[provider.apiKeyEnv];
|
|
@@ -9923,94 +10707,1641 @@ function deleteProfile(id, db) {
|
|
|
9923
10707
|
const result = d.prepare("DELETE FROM search_profiles WHERE id = ?").run(id);
|
|
9924
10708
|
return result.changes > 0;
|
|
9925
10709
|
}
|
|
9926
|
-
// src/lib/
|
|
9927
|
-
|
|
9928
|
-
|
|
9929
|
-
|
|
9930
|
-
|
|
9931
|
-
|
|
9932
|
-
|
|
9933
|
-
|
|
9934
|
-
|
|
9935
|
-
|
|
9936
|
-
|
|
10710
|
+
// src/lib/providers/google.ts
|
|
10711
|
+
class GoogleProvider {
|
|
10712
|
+
name = "google";
|
|
10713
|
+
displayName = "Google";
|
|
10714
|
+
isConfigured() {
|
|
10715
|
+
return !!Bun.env.SERP_API_KEY;
|
|
10716
|
+
}
|
|
10717
|
+
async search(query, options) {
|
|
10718
|
+
const apiKey = Bun.env.SERP_API_KEY;
|
|
10719
|
+
if (!apiKey)
|
|
10720
|
+
throw new Error("SERP_API_KEY not configured");
|
|
10721
|
+
const params = new URLSearchParams({
|
|
10722
|
+
api_key: apiKey,
|
|
10723
|
+
engine: "google",
|
|
10724
|
+
q: query,
|
|
10725
|
+
num: String(options?.limit ?? 10)
|
|
10726
|
+
});
|
|
10727
|
+
if (options?.language)
|
|
10728
|
+
params.set("hl", options.language);
|
|
10729
|
+
if (options?.region)
|
|
10730
|
+
params.set("gl", options.region);
|
|
10731
|
+
if (options?.safeSearch)
|
|
10732
|
+
params.set("safe", "active");
|
|
10733
|
+
const res = await fetch(`https://serpapi.com/search.json?${params}`);
|
|
10734
|
+
if (!res.ok)
|
|
10735
|
+
throw new Error(`SerpAPI error: ${res.status} ${res.statusText}`);
|
|
10736
|
+
const data = await res.json();
|
|
10737
|
+
if (data.error)
|
|
10738
|
+
throw new Error(`SerpAPI error: ${data.error}`);
|
|
10739
|
+
return (data.organic_results ?? []).map((r, i) => ({
|
|
10740
|
+
title: r.title,
|
|
10741
|
+
url: r.link,
|
|
10742
|
+
snippet: r.snippet ?? "",
|
|
10743
|
+
score: 1 - i * 0.05,
|
|
10744
|
+
publishedAt: r.date ?? undefined,
|
|
10745
|
+
thumbnail: r.thumbnail ?? undefined,
|
|
10746
|
+
metadata: { position: r.position ?? i + 1 }
|
|
10747
|
+
}));
|
|
9937
10748
|
}
|
|
9938
|
-
mkdirSync2(newDir, { recursive: true });
|
|
9939
|
-
return newDir;
|
|
9940
|
-
}
|
|
9941
|
-
function getConfigPath() {
|
|
9942
|
-
return `${getConfigDir()}/config.json`;
|
|
9943
10749
|
}
|
|
9944
|
-
|
|
9945
|
-
|
|
9946
|
-
|
|
9947
|
-
|
|
10750
|
+
|
|
10751
|
+
// src/lib/providers/serpapi.ts
|
|
10752
|
+
class SerpApiProvider {
|
|
10753
|
+
name = "serpapi";
|
|
10754
|
+
displayName = "SerpAPI";
|
|
10755
|
+
engine;
|
|
10756
|
+
constructor(engine = "google") {
|
|
10757
|
+
this.engine = engine;
|
|
9948
10758
|
}
|
|
9949
|
-
|
|
9950
|
-
|
|
9951
|
-
|
|
9952
|
-
|
|
9953
|
-
|
|
9954
|
-
|
|
10759
|
+
isConfigured() {
|
|
10760
|
+
return !!Bun.env.SERP_API_KEY;
|
|
10761
|
+
}
|
|
10762
|
+
async search(query, options) {
|
|
10763
|
+
const apiKey = Bun.env.SERP_API_KEY;
|
|
10764
|
+
if (!apiKey)
|
|
10765
|
+
throw new Error("SERP_API_KEY not configured");
|
|
10766
|
+
const params = new URLSearchParams({
|
|
10767
|
+
api_key: apiKey,
|
|
10768
|
+
engine: this.engine,
|
|
10769
|
+
q: query,
|
|
10770
|
+
num: String(options?.limit ?? 10)
|
|
10771
|
+
});
|
|
10772
|
+
if (options?.language)
|
|
10773
|
+
params.set("hl", options.language);
|
|
10774
|
+
if (options?.region)
|
|
10775
|
+
params.set("gl", options.region);
|
|
10776
|
+
const res = await fetch(`https://serpapi.com/search.json?${params}`);
|
|
10777
|
+
if (!res.ok)
|
|
10778
|
+
throw new Error(`SerpAPI error: ${res.status} ${res.statusText}`);
|
|
10779
|
+
const data = await res.json();
|
|
10780
|
+
if (data.error)
|
|
10781
|
+
throw new Error(`SerpAPI error: ${data.error}`);
|
|
10782
|
+
return (data.organic_results ?? []).map((r, i) => ({
|
|
10783
|
+
title: r.title,
|
|
10784
|
+
url: r.link,
|
|
10785
|
+
snippet: r.snippet ?? "",
|
|
10786
|
+
score: 1 - i * 0.05,
|
|
10787
|
+
publishedAt: r.date ?? undefined,
|
|
10788
|
+
thumbnail: r.thumbnail ?? undefined,
|
|
10789
|
+
metadata: { engine: this.engine, position: r.position ?? i + 1 }
|
|
10790
|
+
}));
|
|
9955
10791
|
}
|
|
9956
10792
|
}
|
|
9957
|
-
|
|
9958
|
-
|
|
9959
|
-
|
|
9960
|
-
|
|
9961
|
-
|
|
9962
|
-
|
|
10793
|
+
|
|
10794
|
+
// src/lib/providers/exa.ts
|
|
10795
|
+
class ExaProvider {
|
|
10796
|
+
name = "exa";
|
|
10797
|
+
displayName = "Exa.ai";
|
|
10798
|
+
isConfigured() {
|
|
10799
|
+
return !!Bun.env.EXA_API_KEY;
|
|
10800
|
+
}
|
|
10801
|
+
async search(query, options) {
|
|
10802
|
+
const apiKey = Bun.env.EXA_API_KEY;
|
|
10803
|
+
if (!apiKey)
|
|
10804
|
+
throw new Error("EXA_API_KEY not configured");
|
|
10805
|
+
const body = {
|
|
10806
|
+
query,
|
|
10807
|
+
numResults: options?.limit ?? 10,
|
|
10808
|
+
type: "auto",
|
|
10809
|
+
contents: {
|
|
10810
|
+
text: { maxCharacters: 500 }
|
|
10811
|
+
}
|
|
10812
|
+
};
|
|
10813
|
+
if (options?.dateRange?.from)
|
|
10814
|
+
body.startPublishedDate = options.dateRange.from;
|
|
10815
|
+
if (options?.dateRange?.to)
|
|
10816
|
+
body.endPublishedDate = options.dateRange.to;
|
|
10817
|
+
const res = await fetch("https://api.exa.ai/search", {
|
|
10818
|
+
method: "POST",
|
|
10819
|
+
headers: {
|
|
10820
|
+
"Content-Type": "application/json",
|
|
10821
|
+
"x-api-key": apiKey
|
|
10822
|
+
},
|
|
10823
|
+
body: JSON.stringify(body)
|
|
10824
|
+
});
|
|
10825
|
+
if (!res.ok)
|
|
10826
|
+
throw new Error(`Exa API error: ${res.status} ${res.statusText}`);
|
|
10827
|
+
const data = await res.json();
|
|
10828
|
+
return data.results.map((r) => ({
|
|
10829
|
+
title: r.title,
|
|
10830
|
+
url: r.url,
|
|
10831
|
+
snippet: r.text ?? "",
|
|
10832
|
+
score: r.score,
|
|
10833
|
+
publishedAt: r.publishedDate ?? undefined,
|
|
10834
|
+
thumbnail: r.image ?? undefined,
|
|
10835
|
+
metadata: { author: r.author }
|
|
10836
|
+
}));
|
|
10837
|
+
}
|
|
9963
10838
|
}
|
|
9964
|
-
|
|
9965
|
-
|
|
9966
|
-
|
|
9967
|
-
|
|
10839
|
+
|
|
10840
|
+
// src/lib/providers/perplexity.ts
|
|
10841
|
+
class PerplexityProvider {
|
|
10842
|
+
name = "perplexity";
|
|
10843
|
+
displayName = "Perplexity";
|
|
10844
|
+
isConfigured() {
|
|
10845
|
+
return !!Bun.env.PERPLEXITY_API_KEY;
|
|
10846
|
+
}
|
|
10847
|
+
async search(query, _options) {
|
|
10848
|
+
const apiKey = Bun.env.PERPLEXITY_API_KEY;
|
|
10849
|
+
if (!apiKey)
|
|
10850
|
+
throw new Error("PERPLEXITY_API_KEY not configured");
|
|
10851
|
+
const res = await fetch("https://api.perplexity.ai/chat/completions", {
|
|
10852
|
+
method: "POST",
|
|
10853
|
+
headers: {
|
|
10854
|
+
"Content-Type": "application/json",
|
|
10855
|
+
Authorization: `Bearer ${apiKey}`
|
|
10856
|
+
},
|
|
10857
|
+
body: JSON.stringify({
|
|
10858
|
+
model: "sonar",
|
|
10859
|
+
messages: [
|
|
10860
|
+
{
|
|
10861
|
+
role: "system",
|
|
10862
|
+
content: "You are a search assistant. Provide factual, well-sourced answers with citations."
|
|
10863
|
+
},
|
|
10864
|
+
{ role: "user", content: query }
|
|
10865
|
+
],
|
|
10866
|
+
return_citations: true
|
|
10867
|
+
})
|
|
10868
|
+
});
|
|
10869
|
+
if (!res.ok)
|
|
10870
|
+
throw new Error(`Perplexity API error: ${res.status} ${res.statusText}`);
|
|
10871
|
+
const data = await res.json();
|
|
10872
|
+
const content = data.choices[0]?.message?.content ?? "";
|
|
10873
|
+
const citations = data.citations ?? [];
|
|
10874
|
+
return citations.map((citation, i) => {
|
|
10875
|
+
const isString = typeof citation === "string";
|
|
10876
|
+
const url = isString ? citation : citation.url;
|
|
10877
|
+
const title = isString ? url : citation.title ?? url;
|
|
10878
|
+
const snippet = isString ? content.substring(0, 300) : citation.snippet ?? content.substring(0, 300);
|
|
10879
|
+
return {
|
|
10880
|
+
title,
|
|
10881
|
+
url,
|
|
10882
|
+
snippet,
|
|
10883
|
+
score: 1 - i * 0.1,
|
|
10884
|
+
metadata: { aiSummary: content.substring(0, 500) }
|
|
10885
|
+
};
|
|
10886
|
+
});
|
|
10887
|
+
}
|
|
9968
10888
|
}
|
|
9969
|
-
|
|
9970
|
-
|
|
9971
|
-
|
|
10889
|
+
|
|
10890
|
+
// src/lib/providers/brave.ts
|
|
10891
|
+
class BraveProvider {
|
|
10892
|
+
name = "brave";
|
|
10893
|
+
displayName = "Brave Search";
|
|
10894
|
+
isConfigured() {
|
|
10895
|
+
return !!Bun.env.BRAVE_API_KEY;
|
|
10896
|
+
}
|
|
10897
|
+
async search(query, options) {
|
|
10898
|
+
const apiKey = Bun.env.BRAVE_API_KEY;
|
|
10899
|
+
if (!apiKey)
|
|
10900
|
+
throw new Error("BRAVE_API_KEY not configured");
|
|
10901
|
+
const params = new URLSearchParams({
|
|
10902
|
+
q: query,
|
|
10903
|
+
count: String(options?.limit ?? 10)
|
|
10904
|
+
});
|
|
10905
|
+
if (options?.safeSearch)
|
|
10906
|
+
params.set("safesearch", "strict");
|
|
10907
|
+
if (options?.language)
|
|
10908
|
+
params.set("search_lang", options.language);
|
|
10909
|
+
if (options?.region)
|
|
10910
|
+
params.set("country", options.region);
|
|
10911
|
+
const res = await fetch(`https://api.search.brave.com/res/v1/web/search?${params}`, {
|
|
10912
|
+
headers: {
|
|
10913
|
+
Accept: "application/json",
|
|
10914
|
+
"Accept-Encoding": "gzip",
|
|
10915
|
+
"X-Subscription-Token": apiKey
|
|
10916
|
+
}
|
|
10917
|
+
});
|
|
10918
|
+
if (!res.ok)
|
|
10919
|
+
throw new Error(`Brave Search API error: ${res.status} ${res.statusText}`);
|
|
10920
|
+
const data = await res.json();
|
|
10921
|
+
return (data.web?.results ?? []).map((r, i) => ({
|
|
10922
|
+
title: r.title,
|
|
10923
|
+
url: r.url,
|
|
10924
|
+
snippet: r.description ?? "",
|
|
10925
|
+
score: 1 - i * 0.05,
|
|
10926
|
+
publishedAt: r.age ?? undefined,
|
|
10927
|
+
thumbnail: r.thumbnail?.src ?? undefined,
|
|
10928
|
+
metadata: {}
|
|
10929
|
+
}));
|
|
10930
|
+
}
|
|
9972
10931
|
}
|
|
9973
|
-
|
|
10932
|
+
|
|
10933
|
+
// src/lib/providers/bing.ts
|
|
10934
|
+
class BingProvider {
|
|
10935
|
+
name = "bing";
|
|
10936
|
+
displayName = "Bing";
|
|
10937
|
+
isConfigured() {
|
|
10938
|
+
return !!Bun.env.BING_API_KEY;
|
|
10939
|
+
}
|
|
10940
|
+
async search(query, options) {
|
|
10941
|
+
const apiKey = Bun.env.BING_API_KEY;
|
|
10942
|
+
if (!apiKey)
|
|
10943
|
+
throw new Error("BING_API_KEY not configured");
|
|
10944
|
+
const params = new URLSearchParams({
|
|
10945
|
+
q: query,
|
|
10946
|
+
count: String(options?.limit ?? 10),
|
|
10947
|
+
responseFilter: "Webpages"
|
|
10948
|
+
});
|
|
10949
|
+
if (options?.offset)
|
|
10950
|
+
params.set("offset", String(options.offset));
|
|
10951
|
+
if (options?.safeSearch)
|
|
10952
|
+
params.set("safeSearch", "Strict");
|
|
10953
|
+
if (options?.region)
|
|
10954
|
+
params.set("mkt", options.region);
|
|
10955
|
+
const res = await fetch(`https://api.bing.microsoft.com/v7.0/search?${params}`, {
|
|
10956
|
+
headers: {
|
|
10957
|
+
"Ocp-Apim-Subscription-Key": apiKey
|
|
10958
|
+
}
|
|
10959
|
+
});
|
|
10960
|
+
if (!res.ok)
|
|
10961
|
+
throw new Error(`Bing API error: ${res.status} ${res.statusText}`);
|
|
10962
|
+
const data = await res.json();
|
|
10963
|
+
if (data.error)
|
|
10964
|
+
throw new Error(`Bing API error: ${data.error.message}`);
|
|
10965
|
+
return (data.webPages?.value ?? []).map((r, i) => ({
|
|
10966
|
+
title: r.name,
|
|
10967
|
+
url: r.url,
|
|
10968
|
+
snippet: r.snippet ?? "",
|
|
10969
|
+
score: 1 - i * 0.05,
|
|
10970
|
+
publishedAt: r.dateLastCrawled ?? undefined,
|
|
10971
|
+
thumbnail: r.thumbnailUrl ?? undefined,
|
|
10972
|
+
metadata: {
|
|
10973
|
+
totalEstimatedMatches: data.webPages?.totalEstimatedMatches
|
|
10974
|
+
}
|
|
10975
|
+
}));
|
|
10976
|
+
}
|
|
10977
|
+
}
|
|
10978
|
+
|
|
10979
|
+
// src/lib/providers/twitter.ts
|
|
10980
|
+
class TwitterProvider {
|
|
10981
|
+
name = "twitter";
|
|
10982
|
+
displayName = "X / Twitter";
|
|
10983
|
+
isConfigured() {
|
|
10984
|
+
return !!Bun.env.X_BEARER_TOKEN;
|
|
10985
|
+
}
|
|
10986
|
+
async search(query, options) {
|
|
10987
|
+
const token = Bun.env.X_BEARER_TOKEN;
|
|
10988
|
+
if (!token)
|
|
10989
|
+
throw new Error("X_BEARER_TOKEN not configured");
|
|
10990
|
+
const params = new URLSearchParams({
|
|
10991
|
+
query,
|
|
10992
|
+
max_results: String(Math.min(options?.limit ?? 10, 100)),
|
|
10993
|
+
"tweet.fields": "created_at,public_metrics,author_id",
|
|
10994
|
+
expansions: "author_id",
|
|
10995
|
+
"user.fields": "name,username"
|
|
10996
|
+
});
|
|
10997
|
+
if (options?.dateRange?.from)
|
|
10998
|
+
params.set("start_time", new Date(options.dateRange.from).toISOString());
|
|
10999
|
+
if (options?.dateRange?.to)
|
|
11000
|
+
params.set("end_time", new Date(options.dateRange.to).toISOString());
|
|
11001
|
+
const res = await fetch(`https://api.twitter.com/2/tweets/search/recent?${params}`, {
|
|
11002
|
+
headers: { Authorization: `Bearer ${token}` }
|
|
11003
|
+
});
|
|
11004
|
+
if (!res.ok)
|
|
11005
|
+
throw new Error(`Twitter API error: ${res.status} ${res.statusText}`);
|
|
11006
|
+
const data = await res.json();
|
|
11007
|
+
if (data.errors?.length)
|
|
11008
|
+
throw new Error(`Twitter API error: ${data.errors[0].message}`);
|
|
11009
|
+
const users = new Map;
|
|
11010
|
+
for (const u of data.includes?.users ?? []) {
|
|
11011
|
+
users.set(u.id, u);
|
|
11012
|
+
}
|
|
11013
|
+
return (data.data ?? []).map((tweet, i) => {
|
|
11014
|
+
const user = tweet.author_id ? users.get(tweet.author_id) : undefined;
|
|
11015
|
+
const username = user?.username ?? "unknown";
|
|
11016
|
+
return {
|
|
11017
|
+
title: tweet.text.substring(0, 100) + (tweet.text.length > 100 ? "..." : ""),
|
|
11018
|
+
url: `https://x.com/${username}/status/${tweet.id}`,
|
|
11019
|
+
snippet: tweet.text,
|
|
11020
|
+
score: tweet.public_metrics ? (tweet.public_metrics.like_count + tweet.public_metrics.retweet_count * 2) / 1000 : 1 - i * 0.05,
|
|
11021
|
+
publishedAt: tweet.created_at,
|
|
11022
|
+
metadata: {
|
|
11023
|
+
authorName: user?.name,
|
|
11024
|
+
authorUsername: username,
|
|
11025
|
+
metrics: tweet.public_metrics
|
|
11026
|
+
}
|
|
11027
|
+
};
|
|
11028
|
+
});
|
|
11029
|
+
}
|
|
11030
|
+
}
|
|
11031
|
+
|
|
11032
|
+
// src/lib/providers/reddit.ts
|
|
11033
|
+
var cachedToken = null;
|
|
11034
|
+
async function getOAuthToken() {
|
|
11035
|
+
if (cachedToken && Date.now() < cachedToken.expiresAt) {
|
|
11036
|
+
return cachedToken.token;
|
|
11037
|
+
}
|
|
11038
|
+
const clientId = Bun.env.REDDIT_CLIENT_ID;
|
|
11039
|
+
const clientSecret = Bun.env.REDDIT_CLIENT_SECRET;
|
|
11040
|
+
if (!clientId || !clientSecret)
|
|
11041
|
+
throw new Error("REDDIT_CLIENT_ID and REDDIT_CLIENT_SECRET required");
|
|
11042
|
+
const credentials = btoa(`${clientId}:${clientSecret}`);
|
|
11043
|
+
const res = await fetch("https://www.reddit.com/api/v1/access_token", {
|
|
11044
|
+
method: "POST",
|
|
11045
|
+
headers: {
|
|
11046
|
+
Authorization: `Basic ${credentials}`,
|
|
11047
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
|
11048
|
+
"User-Agent": "open-search/0.0.1"
|
|
11049
|
+
},
|
|
11050
|
+
body: "grant_type=client_credentials"
|
|
11051
|
+
});
|
|
11052
|
+
if (!res.ok)
|
|
11053
|
+
throw new Error(`Reddit OAuth error: ${res.status}`);
|
|
11054
|
+
const data = await res.json();
|
|
11055
|
+
cachedToken = {
|
|
11056
|
+
token: data.access_token,
|
|
11057
|
+
expiresAt: Date.now() + data.expires_in * 1000 - 60000
|
|
11058
|
+
};
|
|
11059
|
+
return cachedToken.token;
|
|
11060
|
+
}
|
|
11061
|
+
|
|
11062
|
+
class RedditProvider {
|
|
11063
|
+
name = "reddit";
|
|
11064
|
+
displayName = "Reddit";
|
|
11065
|
+
isConfigured() {
|
|
11066
|
+
return !!Bun.env.REDDIT_CLIENT_ID && !!Bun.env.REDDIT_CLIENT_SECRET;
|
|
11067
|
+
}
|
|
11068
|
+
async search(query, options) {
|
|
11069
|
+
const token = await getOAuthToken();
|
|
11070
|
+
const params = new URLSearchParams({
|
|
11071
|
+
q: query,
|
|
11072
|
+
limit: String(options?.limit ?? 10),
|
|
11073
|
+
sort: "relevance",
|
|
11074
|
+
type: "link"
|
|
11075
|
+
});
|
|
11076
|
+
const res = await fetch(`https://oauth.reddit.com/search?${params}`, {
|
|
11077
|
+
headers: {
|
|
11078
|
+
Authorization: `Bearer ${token}`,
|
|
11079
|
+
"User-Agent": "open-search/0.0.1"
|
|
11080
|
+
}
|
|
11081
|
+
});
|
|
11082
|
+
if (!res.ok)
|
|
11083
|
+
throw new Error(`Reddit API error: ${res.status} ${res.statusText}`);
|
|
11084
|
+
const data = await res.json();
|
|
11085
|
+
if (data.error)
|
|
11086
|
+
throw new Error(`Reddit API error: ${data.message ?? data.error}`);
|
|
11087
|
+
return (data.data?.children ?? []).map((post) => {
|
|
11088
|
+
const p = post.data;
|
|
11089
|
+
return {
|
|
11090
|
+
title: p.title,
|
|
11091
|
+
url: `https://www.reddit.com${p.permalink}`,
|
|
11092
|
+
snippet: p.selftext?.substring(0, 300) ?? "",
|
|
11093
|
+
score: p.score / 1000,
|
|
11094
|
+
publishedAt: new Date(p.created_utc * 1000).toISOString(),
|
|
11095
|
+
thumbnail: p.thumbnail && p.thumbnail !== "self" ? p.thumbnail : undefined,
|
|
11096
|
+
metadata: {
|
|
11097
|
+
subreddit: p.subreddit,
|
|
11098
|
+
upvotes: p.score,
|
|
11099
|
+
comments: p.num_comments,
|
|
11100
|
+
author: p.author,
|
|
11101
|
+
originalUrl: p.url
|
|
11102
|
+
}
|
|
11103
|
+
};
|
|
11104
|
+
});
|
|
11105
|
+
}
|
|
11106
|
+
}
|
|
11107
|
+
|
|
11108
|
+
// src/lib/providers/youtube.ts
|
|
11109
|
+
class YouTubeProvider {
|
|
11110
|
+
name = "youtube";
|
|
11111
|
+
displayName = "YouTube";
|
|
11112
|
+
isConfigured() {
|
|
11113
|
+
return !!Bun.env.YOUTUBE_API_KEY;
|
|
11114
|
+
}
|
|
11115
|
+
async search(query, options) {
|
|
11116
|
+
const apiKey = Bun.env.YOUTUBE_API_KEY;
|
|
11117
|
+
if (!apiKey)
|
|
11118
|
+
throw new Error("YOUTUBE_API_KEY not configured");
|
|
11119
|
+
const params = new URLSearchParams({
|
|
11120
|
+
part: "snippet",
|
|
11121
|
+
q: query,
|
|
11122
|
+
type: "video",
|
|
11123
|
+
maxResults: String(options?.limit ?? 10),
|
|
11124
|
+
key: apiKey,
|
|
11125
|
+
order: "relevance"
|
|
11126
|
+
});
|
|
11127
|
+
if (options?.language)
|
|
11128
|
+
params.set("relevanceLanguage", options.language);
|
|
11129
|
+
if (options?.safeSearch)
|
|
11130
|
+
params.set("safeSearch", "strict");
|
|
11131
|
+
if (options?.dateRange?.from)
|
|
11132
|
+
params.set("publishedAfter", new Date(options.dateRange.from).toISOString());
|
|
11133
|
+
if (options?.dateRange?.to)
|
|
11134
|
+
params.set("publishedBefore", new Date(options.dateRange.to).toISOString());
|
|
11135
|
+
const res = await fetch(`https://www.googleapis.com/youtube/v3/search?${params}`);
|
|
11136
|
+
if (!res.ok)
|
|
11137
|
+
throw new Error(`YouTube API error: ${res.status} ${res.statusText}`);
|
|
11138
|
+
const data = await res.json();
|
|
11139
|
+
if (data.error)
|
|
11140
|
+
throw new Error(`YouTube API error: ${data.error.message}`);
|
|
11141
|
+
return (data.items ?? []).filter((item) => item.id.videoId).map((item, i) => ({
|
|
11142
|
+
title: item.snippet.title,
|
|
11143
|
+
url: `https://www.youtube.com/watch?v=${item.id.videoId}`,
|
|
11144
|
+
snippet: item.snippet.description,
|
|
11145
|
+
score: 1 - i * 0.05,
|
|
11146
|
+
publishedAt: item.snippet.publishedAt,
|
|
11147
|
+
thumbnail: item.snippet.thumbnails?.high?.url ?? item.snippet.thumbnails?.medium?.url ?? undefined,
|
|
11148
|
+
metadata: {
|
|
11149
|
+
videoId: item.id.videoId,
|
|
11150
|
+
channelTitle: item.snippet.channelTitle,
|
|
11151
|
+
totalResults: data.pageInfo?.totalResults
|
|
11152
|
+
}
|
|
11153
|
+
}));
|
|
11154
|
+
}
|
|
11155
|
+
}
|
|
11156
|
+
|
|
11157
|
+
// src/lib/providers/hackernews.ts
|
|
11158
|
+
class HackerNewsProvider {
|
|
11159
|
+
name = "hackernews";
|
|
11160
|
+
displayName = "Hacker News";
|
|
11161
|
+
isConfigured() {
|
|
11162
|
+
return true;
|
|
11163
|
+
}
|
|
11164
|
+
async search(query, options) {
|
|
11165
|
+
const params = new URLSearchParams({
|
|
11166
|
+
query,
|
|
11167
|
+
hitsPerPage: String(options?.limit ?? 10),
|
|
11168
|
+
tags: "story"
|
|
11169
|
+
});
|
|
11170
|
+
if (options?.dateRange?.from) {
|
|
11171
|
+
params.set("numericFilters", `created_at_i>${Math.floor(new Date(options.dateRange.from).getTime() / 1000)}`);
|
|
11172
|
+
}
|
|
11173
|
+
const res = await fetch(`https://hn.algolia.com/api/v1/search?${params}`);
|
|
11174
|
+
if (!res.ok)
|
|
11175
|
+
throw new Error(`HN Algolia API error: ${res.status} ${res.statusText}`);
|
|
11176
|
+
const data = await res.json();
|
|
11177
|
+
return data.hits.map((hit) => {
|
|
11178
|
+
const hnUrl = `https://news.ycombinator.com/item?id=${hit.objectID}`;
|
|
11179
|
+
return {
|
|
11180
|
+
title: hit.title ?? hit.comment_text?.substring(0, 100) ?? "Untitled",
|
|
11181
|
+
url: hit.url ?? hnUrl,
|
|
11182
|
+
snippet: hit.story_text?.substring(0, 300) ?? hit.comment_text?.substring(0, 300) ?? "",
|
|
11183
|
+
score: (hit.points ?? 0) / 100,
|
|
11184
|
+
publishedAt: hit.created_at,
|
|
11185
|
+
metadata: {
|
|
11186
|
+
hnUrl,
|
|
11187
|
+
author: hit.author,
|
|
11188
|
+
points: hit.points,
|
|
11189
|
+
comments: hit.num_comments,
|
|
11190
|
+
tags: hit._tags,
|
|
11191
|
+
totalHits: data.nbHits
|
|
11192
|
+
}
|
|
11193
|
+
};
|
|
11194
|
+
});
|
|
11195
|
+
}
|
|
11196
|
+
}
|
|
11197
|
+
|
|
11198
|
+
// src/lib/providers/github.ts
|
|
11199
|
+
class GitHubProvider {
|
|
11200
|
+
name = "github";
|
|
11201
|
+
displayName = "GitHub";
|
|
11202
|
+
isConfigured() {
|
|
11203
|
+
return !!Bun.env.GITHUB_TOKEN;
|
|
11204
|
+
}
|
|
11205
|
+
async search(query, options) {
|
|
11206
|
+
const token = Bun.env.GITHUB_TOKEN;
|
|
11207
|
+
if (!token)
|
|
11208
|
+
throw new Error("GITHUB_TOKEN not configured");
|
|
11209
|
+
const limit = options?.limit ?? 10;
|
|
11210
|
+
const headers = {
|
|
11211
|
+
Authorization: `Bearer ${token}`,
|
|
11212
|
+
Accept: "application/vnd.github+json",
|
|
11213
|
+
"X-GitHub-Api-Version": "2022-11-28"
|
|
11214
|
+
};
|
|
11215
|
+
const [codeResults, repoResults] = await Promise.allSettled([
|
|
11216
|
+
this.searchCode(query, Math.ceil(limit / 2), headers),
|
|
11217
|
+
this.searchRepos(query, Math.ceil(limit / 2), headers)
|
|
11218
|
+
]);
|
|
11219
|
+
const results = [];
|
|
11220
|
+
if (codeResults.status === "fulfilled")
|
|
11221
|
+
results.push(...codeResults.value);
|
|
11222
|
+
if (repoResults.status === "fulfilled")
|
|
11223
|
+
results.push(...repoResults.value);
|
|
11224
|
+
return results.slice(0, limit);
|
|
11225
|
+
}
|
|
11226
|
+
async searchCode(query, limit, headers) {
|
|
11227
|
+
const params = new URLSearchParams({
|
|
11228
|
+
q: query,
|
|
11229
|
+
per_page: String(limit)
|
|
11230
|
+
});
|
|
11231
|
+
const res = await fetch(`https://api.github.com/search/code?${params}`, {
|
|
11232
|
+
headers: {
|
|
11233
|
+
...headers,
|
|
11234
|
+
Accept: "application/vnd.github.text-match+json"
|
|
11235
|
+
}
|
|
11236
|
+
});
|
|
11237
|
+
if (!res.ok)
|
|
11238
|
+
return [];
|
|
11239
|
+
const data = await res.json();
|
|
11240
|
+
return data.items.map((item) => ({
|
|
11241
|
+
title: `${item.repository.full_name}/${item.path}`,
|
|
11242
|
+
url: item.html_url,
|
|
11243
|
+
snippet: item.text_matches?.[0]?.fragment ?? `Code match in ${item.path}`,
|
|
11244
|
+
score: (item.repository.stargazers_count ?? 0) / 1e4,
|
|
11245
|
+
metadata: {
|
|
11246
|
+
type: "code",
|
|
11247
|
+
repo: item.repository.full_name,
|
|
11248
|
+
path: item.path,
|
|
11249
|
+
language: item.repository.language,
|
|
11250
|
+
stars: item.repository.stargazers_count
|
|
11251
|
+
}
|
|
11252
|
+
}));
|
|
11253
|
+
}
|
|
11254
|
+
async searchRepos(query, limit, headers) {
|
|
11255
|
+
const params = new URLSearchParams({
|
|
11256
|
+
q: query,
|
|
11257
|
+
per_page: String(limit),
|
|
11258
|
+
sort: "stars",
|
|
11259
|
+
order: "desc"
|
|
11260
|
+
});
|
|
11261
|
+
const res = await fetch(`https://api.github.com/search/repositories?${params}`, {
|
|
11262
|
+
headers
|
|
11263
|
+
});
|
|
11264
|
+
if (!res.ok)
|
|
11265
|
+
return [];
|
|
11266
|
+
const data = await res.json();
|
|
11267
|
+
return data.items.map((item) => ({
|
|
11268
|
+
title: item.full_name,
|
|
11269
|
+
url: item.html_url,
|
|
11270
|
+
snippet: item.description ?? "",
|
|
11271
|
+
score: item.stargazers_count / 1e4,
|
|
11272
|
+
publishedAt: item.updated_at,
|
|
11273
|
+
metadata: {
|
|
11274
|
+
type: "repository",
|
|
11275
|
+
stars: item.stargazers_count,
|
|
11276
|
+
language: item.language,
|
|
11277
|
+
topics: item.topics
|
|
11278
|
+
}
|
|
11279
|
+
}));
|
|
11280
|
+
}
|
|
11281
|
+
}
|
|
11282
|
+
|
|
11283
|
+
// src/lib/providers/arxiv.ts
|
|
11284
|
+
class ArxivProvider {
|
|
11285
|
+
name = "arxiv";
|
|
11286
|
+
displayName = "arXiv";
|
|
11287
|
+
isConfigured() {
|
|
11288
|
+
return true;
|
|
11289
|
+
}
|
|
11290
|
+
async search(query, options) {
|
|
11291
|
+
const limit = options?.limit ?? 10;
|
|
11292
|
+
const offset = options?.offset ?? 0;
|
|
11293
|
+
const params = new URLSearchParams({
|
|
11294
|
+
search_query: `all:${query}`,
|
|
11295
|
+
start: String(offset),
|
|
11296
|
+
max_results: String(limit),
|
|
11297
|
+
sortBy: "relevance",
|
|
11298
|
+
sortOrder: "descending"
|
|
11299
|
+
});
|
|
11300
|
+
const res = await fetch(`https://export.arxiv.org/api/query?${params}`);
|
|
11301
|
+
if (!res.ok)
|
|
11302
|
+
throw new Error(`arXiv API error: ${res.status} ${res.statusText}`);
|
|
11303
|
+
const xml = await res.text();
|
|
11304
|
+
return this.parseAtomFeed(xml);
|
|
11305
|
+
}
|
|
11306
|
+
parseAtomFeed(xml) {
|
|
11307
|
+
const results = [];
|
|
11308
|
+
const entries = xml.split("<entry>");
|
|
11309
|
+
for (let i = 1;i < entries.length; i++) {
|
|
11310
|
+
const entry = entries[i];
|
|
11311
|
+
const title = this.extractTag(entry, "title")?.replace(/\s+/g, " ").trim() ?? "Untitled";
|
|
11312
|
+
const summary = this.extractTag(entry, "summary")?.replace(/\s+/g, " ").trim() ?? "";
|
|
11313
|
+
const published = this.extractTag(entry, "published") ?? undefined;
|
|
11314
|
+
const idUrl = this.extractTag(entry, "id") ?? "";
|
|
11315
|
+
const authors = [];
|
|
11316
|
+
const authorMatches = entry.matchAll(/<author>\s*<name>([^<]+)<\/name>/g);
|
|
11317
|
+
for (const match of authorMatches) {
|
|
11318
|
+
if (match[1])
|
|
11319
|
+
authors.push(match[1].trim());
|
|
11320
|
+
}
|
|
11321
|
+
const categories = [];
|
|
11322
|
+
const catMatches = entry.matchAll(/category[^>]*term="([^"]+)"/g);
|
|
11323
|
+
for (const match of catMatches) {
|
|
11324
|
+
if (match[1])
|
|
11325
|
+
categories.push(match[1]);
|
|
11326
|
+
}
|
|
11327
|
+
const pdfMatch = entry.match(/link[^>]*href="([^"]+)"[^>]*title="pdf"/);
|
|
11328
|
+
const pdfUrl = pdfMatch?.[1];
|
|
11329
|
+
results.push({
|
|
11330
|
+
title,
|
|
11331
|
+
url: idUrl,
|
|
11332
|
+
snippet: summary.substring(0, 400),
|
|
11333
|
+
score: 1 - (i - 1) * 0.05,
|
|
11334
|
+
publishedAt: published,
|
|
11335
|
+
metadata: {
|
|
11336
|
+
authors,
|
|
11337
|
+
categories,
|
|
11338
|
+
pdfUrl,
|
|
11339
|
+
abstract: summary
|
|
11340
|
+
}
|
|
11341
|
+
});
|
|
11342
|
+
}
|
|
11343
|
+
return results;
|
|
11344
|
+
}
|
|
11345
|
+
extractTag(xml, tag) {
|
|
11346
|
+
const regex = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)<\\/${tag}>`);
|
|
11347
|
+
const match = xml.match(regex);
|
|
11348
|
+
return match?.[1] ?? null;
|
|
11349
|
+
}
|
|
11350
|
+
}
|
|
11351
|
+
|
|
11352
|
+
// src/lib/local/query.ts
|
|
11353
|
+
import { existsSync as existsSync3, readFileSync as readFileSync5 } from "fs";
|
|
11354
|
+
|
|
11355
|
+
// src/lib/local/regex.ts
|
|
11356
|
+
function extractRegexLiterals(pattern) {
|
|
11357
|
+
const branches = splitTopLevelAlternation(pattern);
|
|
11358
|
+
const result = [];
|
|
11359
|
+
for (const branch of branches) {
|
|
11360
|
+
const literals = extractSequenceLiterals(branch).filter((l) => l.length >= 3);
|
|
11361
|
+
if (literals.length === 0)
|
|
11362
|
+
return null;
|
|
11363
|
+
result.push({ literals });
|
|
11364
|
+
}
|
|
11365
|
+
return result;
|
|
11366
|
+
}
|
|
11367
|
+
function splitTopLevelAlternation(pattern) {
|
|
11368
|
+
const branches = [];
|
|
11369
|
+
let depth = 0;
|
|
11370
|
+
let inClass = false;
|
|
11371
|
+
let current = "";
|
|
11372
|
+
for (let i = 0;i < pattern.length; i++) {
|
|
11373
|
+
const ch = pattern[i];
|
|
11374
|
+
if (ch === "\\") {
|
|
11375
|
+
current += ch + (pattern[i + 1] ?? "");
|
|
11376
|
+
i++;
|
|
11377
|
+
continue;
|
|
11378
|
+
}
|
|
11379
|
+
if (inClass) {
|
|
11380
|
+
if (ch === "]")
|
|
11381
|
+
inClass = false;
|
|
11382
|
+
current += ch;
|
|
11383
|
+
continue;
|
|
11384
|
+
}
|
|
11385
|
+
if (ch === "[") {
|
|
11386
|
+
inClass = true;
|
|
11387
|
+
current += ch;
|
|
11388
|
+
} else if (ch === "(") {
|
|
11389
|
+
depth++;
|
|
11390
|
+
current += ch;
|
|
11391
|
+
} else if (ch === ")") {
|
|
11392
|
+
depth--;
|
|
11393
|
+
current += ch;
|
|
11394
|
+
} else if (ch === "|" && depth === 0) {
|
|
11395
|
+
branches.push(current);
|
|
11396
|
+
current = "";
|
|
11397
|
+
} else {
|
|
11398
|
+
current += ch;
|
|
11399
|
+
}
|
|
11400
|
+
}
|
|
11401
|
+
branches.push(current);
|
|
11402
|
+
return branches;
|
|
11403
|
+
}
|
|
11404
|
+
function extractSequenceLiterals(seq) {
|
|
11405
|
+
const literals = [];
|
|
11406
|
+
let run = "";
|
|
11407
|
+
const flush = () => {
|
|
11408
|
+
if (run.length > 0)
|
|
11409
|
+
literals.push(run.toLowerCase());
|
|
11410
|
+
run = "";
|
|
11411
|
+
};
|
|
11412
|
+
let i = 0;
|
|
11413
|
+
while (i < seq.length) {
|
|
11414
|
+
const ch = seq[i];
|
|
11415
|
+
if (ch === "\\") {
|
|
11416
|
+
const next = seq[i + 1];
|
|
11417
|
+
if (next === undefined)
|
|
11418
|
+
break;
|
|
11419
|
+
if (/[a-zA-Z0-9]/.test(next)) {
|
|
11420
|
+
flush();
|
|
11421
|
+
let len = 2;
|
|
11422
|
+
if (next === "x") {
|
|
11423
|
+
len = 4;
|
|
11424
|
+
} else if (next === "u") {
|
|
11425
|
+
if (seq[i + 2] === "{") {
|
|
11426
|
+
const close = seq.indexOf("}", i + 2);
|
|
11427
|
+
len = close === -1 ? 2 : close - i + 1;
|
|
11428
|
+
} else {
|
|
11429
|
+
len = 6;
|
|
11430
|
+
}
|
|
11431
|
+
} else if (next === "c") {
|
|
11432
|
+
len = 3;
|
|
11433
|
+
} else if (next === "k" && seq[i + 2] === "<") {
|
|
11434
|
+
const close = seq.indexOf(">", i + 2);
|
|
11435
|
+
len = close === -1 ? 2 : close - i + 1;
|
|
11436
|
+
} else if ((next === "p" || next === "P") && seq[i + 2] === "{") {
|
|
11437
|
+
const close = seq.indexOf("}", i + 2);
|
|
11438
|
+
len = close === -1 ? 2 : close - i + 1;
|
|
11439
|
+
}
|
|
11440
|
+
i += len;
|
|
11441
|
+
continue;
|
|
11442
|
+
}
|
|
11443
|
+
const q2 = quantifierAt(seq, i + 2);
|
|
11444
|
+
if (q2.optional) {
|
|
11445
|
+
flush();
|
|
11446
|
+
} else {
|
|
11447
|
+
run += next;
|
|
11448
|
+
if (q2.repeats)
|
|
11449
|
+
flush();
|
|
11450
|
+
}
|
|
11451
|
+
i += 2 + q2.length;
|
|
11452
|
+
continue;
|
|
11453
|
+
}
|
|
11454
|
+
if (ch === "(") {
|
|
11455
|
+
const end = findGroupEnd(seq, i);
|
|
11456
|
+
const inner = seq.slice(i + 1, end);
|
|
11457
|
+
const q2 = quantifierAt(seq, end + 1);
|
|
11458
|
+
flush();
|
|
11459
|
+
const isSpecial = inner.startsWith("?") && !inner.startsWith("?:") && !inner.startsWith("?<");
|
|
11460
|
+
const isNamed = inner.startsWith("?<") && !inner.startsWith("?<=") && !inner.startsWith("?<!");
|
|
11461
|
+
const body = inner.startsWith("?:") ? inner.slice(2) : isNamed ? inner.replace(/^\?<[^>]*>/, "") : inner;
|
|
11462
|
+
if (!isSpecial && !q2.optional && !inner.startsWith("?<=") && !inner.startsWith("?<!")) {
|
|
11463
|
+
const groupBranches = splitTopLevelAlternation(body);
|
|
11464
|
+
if (groupBranches.length === 1) {
|
|
11465
|
+
literals.push(...extractSequenceLiterals(body).filter((l) => l.length >= 3));
|
|
11466
|
+
}
|
|
11467
|
+
}
|
|
11468
|
+
i = end + 1 + q2.length;
|
|
11469
|
+
continue;
|
|
11470
|
+
}
|
|
11471
|
+
if (ch === "[") {
|
|
11472
|
+
const end = findClassEnd(seq, i);
|
|
11473
|
+
const q2 = quantifierAt(seq, end + 1);
|
|
11474
|
+
flush();
|
|
11475
|
+
i = end + 1 + q2.length;
|
|
11476
|
+
continue;
|
|
11477
|
+
}
|
|
11478
|
+
if (ch === "." || ch === "^" || ch === "$") {
|
|
11479
|
+
flush();
|
|
11480
|
+
i++;
|
|
11481
|
+
continue;
|
|
11482
|
+
}
|
|
11483
|
+
if (ch === "*" || ch === "?" || ch === "+" || ch === "{") {
|
|
11484
|
+
const q2 = quantifierAt(seq, i);
|
|
11485
|
+
i += Math.max(1, q2.length);
|
|
11486
|
+
continue;
|
|
11487
|
+
}
|
|
11488
|
+
const q = quantifierAt(seq, i + 1);
|
|
11489
|
+
if (q.optional) {
|
|
11490
|
+
flush();
|
|
11491
|
+
} else {
|
|
11492
|
+
run += ch;
|
|
11493
|
+
if (q.repeats)
|
|
11494
|
+
flush();
|
|
11495
|
+
}
|
|
11496
|
+
i += 1 + q.length;
|
|
11497
|
+
}
|
|
11498
|
+
flush();
|
|
11499
|
+
return literals;
|
|
11500
|
+
}
|
|
11501
|
+
function quantifierAt(seq, pos) {
|
|
11502
|
+
const ch = seq[pos];
|
|
11503
|
+
if (ch === "?")
|
|
11504
|
+
return { length: 1, optional: true, repeats: false };
|
|
11505
|
+
if (ch === "*")
|
|
11506
|
+
return { length: 1, optional: true, repeats: true };
|
|
11507
|
+
if (ch === "+")
|
|
11508
|
+
return { length: 1, optional: false, repeats: true };
|
|
11509
|
+
if (ch === "{") {
|
|
11510
|
+
const end = seq.indexOf("}", pos);
|
|
11511
|
+
if (end === -1)
|
|
11512
|
+
return { length: 0, optional: false, repeats: false };
|
|
11513
|
+
const body = seq.slice(pos + 1, end);
|
|
11514
|
+
const min = parseInt(body, 10);
|
|
11515
|
+
const lazy = seq[end + 1] === "?" ? 1 : 0;
|
|
11516
|
+
return { length: end - pos + 1 + lazy, optional: !(min >= 1), repeats: true };
|
|
11517
|
+
}
|
|
11518
|
+
return { length: 0, optional: false, repeats: false };
|
|
11519
|
+
}
|
|
11520
|
+
function findGroupEnd(seq, start) {
|
|
11521
|
+
let depth = 0;
|
|
11522
|
+
let inClass = false;
|
|
11523
|
+
for (let i = start;i < seq.length; i++) {
|
|
11524
|
+
const ch = seq[i];
|
|
11525
|
+
if (ch === "\\") {
|
|
11526
|
+
i++;
|
|
11527
|
+
continue;
|
|
11528
|
+
}
|
|
11529
|
+
if (inClass) {
|
|
11530
|
+
if (ch === "]")
|
|
11531
|
+
inClass = false;
|
|
11532
|
+
continue;
|
|
11533
|
+
}
|
|
11534
|
+
if (ch === "[")
|
|
11535
|
+
inClass = true;
|
|
11536
|
+
else if (ch === "(")
|
|
11537
|
+
depth++;
|
|
11538
|
+
else if (ch === ")") {
|
|
11539
|
+
depth--;
|
|
11540
|
+
if (depth === 0)
|
|
11541
|
+
return i;
|
|
11542
|
+
}
|
|
11543
|
+
}
|
|
11544
|
+
return seq.length - 1;
|
|
11545
|
+
}
|
|
11546
|
+
function findClassEnd(seq, start) {
|
|
11547
|
+
for (let i = start + 1;i < seq.length; i++) {
|
|
11548
|
+
const ch = seq[i];
|
|
11549
|
+
if (ch === "\\") {
|
|
11550
|
+
i++;
|
|
11551
|
+
continue;
|
|
11552
|
+
}
|
|
11553
|
+
if (ch === "]" && i > start + 1)
|
|
11554
|
+
return i;
|
|
11555
|
+
}
|
|
11556
|
+
return seq.length - 1;
|
|
11557
|
+
}
|
|
11558
|
+
function buildFtsQueryFromRegex(pattern) {
|
|
11559
|
+
const branches = extractRegexLiterals(pattern);
|
|
11560
|
+
if (!branches)
|
|
11561
|
+
return null;
|
|
11562
|
+
const branchExprs = branches.map((b) => {
|
|
11563
|
+
const ands = b.literals.map((l) => l.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, "")).filter((l) => l.length >= 3).map((l) => `"${l.replace(/"/g, '""')}"`).join(" AND ");
|
|
11564
|
+
return branches.length > 1 ? `(${ands})` : ands;
|
|
11565
|
+
});
|
|
11566
|
+
if (branchExprs.some((e) => e.length === 0))
|
|
11567
|
+
return null;
|
|
11568
|
+
return branchExprs.join(" OR ");
|
|
11569
|
+
}
|
|
11570
|
+
function compileSearchRegex(pattern, caseSensitive = false) {
|
|
11571
|
+
return new RegExp(pattern, caseSensitive ? "" : "i");
|
|
11572
|
+
}
|
|
11573
|
+
|
|
11574
|
+
// src/lib/local/query.ts
|
|
11575
|
+
var MAX_LINE_LENGTH = 200;
|
|
11576
|
+
var MAX_MATCHES_PER_FILE = 5;
|
|
11577
|
+
function tokenize(query) {
|
|
11578
|
+
return query.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, "").split(/\s+/).filter(Boolean);
|
|
11579
|
+
}
|
|
11580
|
+
function buildFtsQuery(query) {
|
|
11581
|
+
const tokens = tokenize(query).filter((t) => t.length >= 3);
|
|
11582
|
+
if (tokens.length === 0)
|
|
11583
|
+
return null;
|
|
11584
|
+
return tokens.map((t) => `"${t.replace(/"/g, '""')}"`).join(" AND ");
|
|
11585
|
+
}
|
|
11586
|
+
function clampLimit(limit, fallback = 20) {
|
|
11587
|
+
if (limit === undefined || !Number.isFinite(limit))
|
|
11588
|
+
return fallback;
|
|
11589
|
+
return Math.max(1, Math.min(500, Math.floor(limit)));
|
|
11590
|
+
}
|
|
11591
|
+
function filterClauses(opts, db) {
|
|
11592
|
+
const clauses = [];
|
|
11593
|
+
const params = [];
|
|
11594
|
+
if (opts.root) {
|
|
11595
|
+
const root = getRoot(opts.root, db);
|
|
11596
|
+
if (!root)
|
|
11597
|
+
throw new Error(`Index root not found: ${opts.root}`);
|
|
11598
|
+
clauses.push("r.id = ?");
|
|
11599
|
+
params.push(root.id);
|
|
11600
|
+
}
|
|
11601
|
+
if (opts.ext) {
|
|
11602
|
+
clauses.push("f.ext = ?");
|
|
11603
|
+
params.push(opts.ext.replace(/^\./, "").toLowerCase());
|
|
11604
|
+
}
|
|
11605
|
+
if (opts.dir) {
|
|
11606
|
+
clauses.push("f.dir LIKE ? ESCAPE '\\'");
|
|
11607
|
+
const dir = opts.dir.replace(/^\/|\/$/g, "").replace(/[\\%_]/g, "\\$&");
|
|
11608
|
+
params.push(`%${dir}%`);
|
|
11609
|
+
}
|
|
11610
|
+
return { sql: clauses.length > 0 ? ` AND ${clauses.join(" AND ")}` : "", params };
|
|
11611
|
+
}
|
|
11612
|
+
function rowToHit(row, score) {
|
|
11613
|
+
return {
|
|
11614
|
+
rootId: row.root_id,
|
|
11615
|
+
rootName: row.root_name,
|
|
11616
|
+
rootPath: row.root_path,
|
|
11617
|
+
relPath: row.rel_path,
|
|
11618
|
+
absPath: `${row.root_path}/${row.rel_path}`,
|
|
11619
|
+
name: row.name,
|
|
11620
|
+
ext: row.ext,
|
|
11621
|
+
dir: row.dir,
|
|
11622
|
+
size: row.size,
|
|
11623
|
+
mtimeMs: row.mtime_ms,
|
|
11624
|
+
isBinary: row.is_binary === 1,
|
|
11625
|
+
score
|
|
11626
|
+
};
|
|
11627
|
+
}
|
|
11628
|
+
function normalizeForMatch(s) {
|
|
11629
|
+
return s.toLowerCase().replace(/[-_.]+/g, " ").trim();
|
|
11630
|
+
}
|
|
11631
|
+
var EXACT_NAME_FLOOR = 0.72;
|
|
11632
|
+
var PREFIX_NAME_FLOOR = 0.58;
|
|
11633
|
+
var CONTENT_MAX_SCORE = 0.65;
|
|
11634
|
+
function scoreFileName(query, tokens, row) {
|
|
11635
|
+
const q = query.trim().toLowerCase();
|
|
11636
|
+
const qNorm = normalizeForMatch(query);
|
|
11637
|
+
const name = row.name.toLowerCase();
|
|
11638
|
+
const stem = name.replace(/\.[^.]+$/, "");
|
|
11639
|
+
const nameNorm = normalizeForMatch(row.name);
|
|
11640
|
+
const stemNorm = normalizeForMatch(row.name.replace(/\.[^.]+$/, ""));
|
|
11641
|
+
const relPath = row.rel_path.toLowerCase();
|
|
11642
|
+
let score = 0;
|
|
11643
|
+
let floor = 0;
|
|
11644
|
+
if (name === q || stem === q || nameNorm === qNorm || stemNorm === qNorm) {
|
|
11645
|
+
score += 100;
|
|
11646
|
+
floor = EXACT_NAME_FLOOR;
|
|
11647
|
+
} else if (name.startsWith(q) || stem.startsWith(q) || stemNorm.startsWith(qNorm)) {
|
|
11648
|
+
score += 60;
|
|
11649
|
+
floor = PREFIX_NAME_FLOOR;
|
|
11650
|
+
} else if (name.includes(q) || nameNorm.includes(qNorm)) {
|
|
11651
|
+
score += 40;
|
|
11652
|
+
}
|
|
11653
|
+
for (const token of tokens) {
|
|
11654
|
+
const t = token.toLowerCase();
|
|
11655
|
+
if (name.includes(t))
|
|
11656
|
+
score += 15;
|
|
11657
|
+
else if (relPath.includes(t))
|
|
11658
|
+
score += 5;
|
|
11659
|
+
}
|
|
11660
|
+
const depth = row.rel_path.split("/").length - 1;
|
|
11661
|
+
score -= depth * 2;
|
|
11662
|
+
const age = Date.now() - row.mtime_ms;
|
|
11663
|
+
if (age < 7 * 86400000)
|
|
11664
|
+
score += 10;
|
|
11665
|
+
else if (age < 30 * 86400000)
|
|
11666
|
+
score += 5;
|
|
11667
|
+
return Math.max(floor, Math.max(0, score) / (Math.max(0, score) + 60));
|
|
11668
|
+
}
|
|
11669
|
+
var CANDIDATE_COLUMNS = `
|
|
11670
|
+
f.id, f.root_id, r.name as root_name, r.path as root_path,
|
|
11671
|
+
f.rel_path, f.name, f.ext, f.dir, f.size, f.mtime_ms, f.is_binary
|
|
11672
|
+
`;
|
|
11673
|
+
function searchFilePaths(query, opts = {}, db) {
|
|
11674
|
+
const d = db ?? getIndexDb();
|
|
11675
|
+
const limit = clampLimit(opts.limit);
|
|
11676
|
+
const tokens = tokenize(query);
|
|
11677
|
+
if (tokens.length === 0)
|
|
11678
|
+
return [];
|
|
11679
|
+
const ftsQuery = buildFtsQuery(query);
|
|
11680
|
+
const filters = filterClauses(opts, d);
|
|
11681
|
+
const candidateLimit = Math.max(200, limit * 10);
|
|
11682
|
+
let rows;
|
|
11683
|
+
if (ftsQuery) {
|
|
11684
|
+
rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
11685
|
+
FROM files_fts fts
|
|
11686
|
+
JOIN files f ON f.id = fts.rowid
|
|
11687
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
11688
|
+
WHERE files_fts MATCH ?${filters.sql}
|
|
11689
|
+
ORDER BY bm25(files_fts, 10.0, 1.0)
|
|
11690
|
+
LIMIT ?`).all(ftsQuery, ...filters.params, candidateLimit);
|
|
11691
|
+
const namePattern = `${query.trim().replace(/[\\%_]/g, "\\$&")}%`;
|
|
11692
|
+
const nameRows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
11693
|
+
FROM files f
|
|
11694
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
11695
|
+
WHERE f.name LIKE ? ESCAPE '\\'${filters.sql}
|
|
11696
|
+
ORDER BY length(f.name)
|
|
11697
|
+
LIMIT 100`).all(namePattern, ...filters.params);
|
|
11698
|
+
const seen = new Set(rows.map((row) => row.id));
|
|
11699
|
+
for (const row of nameRows) {
|
|
11700
|
+
if (!seen.has(row.id))
|
|
11701
|
+
rows.push(row);
|
|
11702
|
+
}
|
|
11703
|
+
} else {
|
|
11704
|
+
const likeClauses = tokens.map(() => "f.rel_path LIKE ? ESCAPE '\\'").join(" AND ");
|
|
11705
|
+
const likeParams = tokens.map((t) => `%${t.replace(/[\\%_]/g, "\\$&")}%`);
|
|
11706
|
+
rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
11707
|
+
FROM files f
|
|
11708
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
11709
|
+
WHERE ${likeClauses}${filters.sql}
|
|
11710
|
+
LIMIT ?`).all(...likeParams, ...filters.params, candidateLimit);
|
|
11711
|
+
}
|
|
11712
|
+
const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
|
|
11713
|
+
const filtered = shortTokens.length > 0 ? rows.filter((row) => shortTokens.every((t) => row.rel_path.toLowerCase().includes(t))) : rows;
|
|
11714
|
+
return filtered.map((row) => rowToHit(row, scoreFileName(query, tokens, row))).sort((a, b) => b.score - a.score).filter((hit) => existsSync3(hit.absPath)).slice(0, limit);
|
|
11715
|
+
}
|
|
11716
|
+
function findLineMatches(content, query, tokens) {
|
|
11717
|
+
const lines = content.split(`
|
|
11718
|
+
`);
|
|
11719
|
+
const phrase = query.trim().toLowerCase();
|
|
11720
|
+
const lowered = tokens.map((t) => t.toLowerCase());
|
|
11721
|
+
const phraseHits = [];
|
|
11722
|
+
const allTokenHits = [];
|
|
11723
|
+
const anyTokenHits = [];
|
|
11724
|
+
for (let i = 0;i < lines.length; i++) {
|
|
11725
|
+
const text = lines[i];
|
|
11726
|
+
const lower = text.toLowerCase();
|
|
11727
|
+
const match = { line: i + 1, text: text.trim().slice(0, MAX_LINE_LENGTH) };
|
|
11728
|
+
if (phrase.length > 0 && lower.includes(phrase))
|
|
11729
|
+
phraseHits.push(match);
|
|
11730
|
+
else if (lowered.every((t) => lower.includes(t)))
|
|
11731
|
+
allTokenHits.push(match);
|
|
11732
|
+
else if (lowered.some((t) => t.length >= 3 && lower.includes(t)))
|
|
11733
|
+
anyTokenHits.push(match);
|
|
11734
|
+
if (phraseHits.length >= MAX_MATCHES_PER_FILE)
|
|
11735
|
+
break;
|
|
11736
|
+
}
|
|
11737
|
+
const tier = phraseHits.length > 0 ? "phrase" : allTokenHits.length > 0 ? "all" : "any";
|
|
11738
|
+
const combined = [...phraseHits, ...allTokenHits, ...anyTokenHits];
|
|
11739
|
+
return { matches: combined.slice(0, MAX_MATCHES_PER_FILE), tier };
|
|
11740
|
+
}
|
|
11741
|
+
function searchFilePathsRegex(pattern, opts = {}, db) {
|
|
11742
|
+
const d = db ?? getIndexDb();
|
|
11743
|
+
const limit = clampLimit(opts.limit);
|
|
11744
|
+
const regex = compileSearchRegex(pattern, opts.caseSensitive);
|
|
11745
|
+
const ftsQuery = buildFtsQueryFromRegex(pattern);
|
|
11746
|
+
if (!ftsQuery) {
|
|
11747
|
+
throw new Error("Regex pattern needs at least one required literal of 3+ characters (e.g. 'handle.*Click', not '\\w+').");
|
|
11748
|
+
}
|
|
11749
|
+
const filters = filterClauses(opts, d);
|
|
11750
|
+
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
11751
|
+
FROM files_fts fts
|
|
11752
|
+
JOIN files f ON f.id = fts.rowid
|
|
11753
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
11754
|
+
WHERE files_fts MATCH ?${filters.sql}
|
|
11755
|
+
ORDER BY fts.rank
|
|
11756
|
+
LIMIT 5000`).all(ftsQuery, ...filters.params);
|
|
11757
|
+
const hits = [];
|
|
11758
|
+
for (const row of rows) {
|
|
11759
|
+
if (!regex.test(row.rel_path) && !regex.test(row.name))
|
|
11760
|
+
continue;
|
|
11761
|
+
const depth = row.rel_path.split("/").length - 1;
|
|
11762
|
+
const score = Math.max(0.05, 0.6 - depth * 0.02);
|
|
11763
|
+
const hit = rowToHit(row, score);
|
|
11764
|
+
if (!existsSync3(hit.absPath))
|
|
11765
|
+
continue;
|
|
11766
|
+
hits.push(hit);
|
|
11767
|
+
if (hits.length >= limit)
|
|
11768
|
+
break;
|
|
11769
|
+
}
|
|
11770
|
+
return hits;
|
|
11771
|
+
}
|
|
11772
|
+
function searchFileContentRegex(pattern, opts = {}, db) {
|
|
11773
|
+
const d = db ?? getIndexDb();
|
|
11774
|
+
const limit = clampLimit(opts.limit);
|
|
11775
|
+
const regex = compileSearchRegex(pattern, opts.caseSensitive);
|
|
11776
|
+
const ftsQuery = buildFtsQueryFromRegex(pattern);
|
|
11777
|
+
if (!ftsQuery) {
|
|
11778
|
+
throw new Error("Regex pattern needs at least one required literal of 3+ characters (e.g. 'export.*function', not '\\d+').");
|
|
11779
|
+
}
|
|
11780
|
+
const filters = filterClauses(opts, d);
|
|
11781
|
+
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
11782
|
+
FROM file_content_fts fts
|
|
11783
|
+
JOIN files f ON f.id = fts.rowid
|
|
11784
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
11785
|
+
WHERE file_content_fts MATCH ?${filters.sql}
|
|
11786
|
+
ORDER BY fts.rank
|
|
11787
|
+
LIMIT ?`).all(ftsQuery, ...filters.params, Math.max(200, limit * 10));
|
|
11788
|
+
const hits = [];
|
|
11789
|
+
for (let i = 0;i < rows.length && hits.length < limit; i++) {
|
|
11790
|
+
const row = rows[i];
|
|
11791
|
+
const absPath = `${row.root_path}/${row.rel_path}`;
|
|
11792
|
+
let content;
|
|
11793
|
+
try {
|
|
11794
|
+
content = readFileSync5(absPath, "utf-8");
|
|
11795
|
+
} catch {
|
|
11796
|
+
continue;
|
|
11797
|
+
}
|
|
11798
|
+
const lines = content.split(`
|
|
11799
|
+
`);
|
|
11800
|
+
const matches = [];
|
|
11801
|
+
for (let n = 0;n < lines.length && matches.length < MAX_MATCHES_PER_FILE; n++) {
|
|
11802
|
+
if (regex.test(lines[n])) {
|
|
11803
|
+
matches.push({ line: n + 1, text: lines[n].trim().slice(0, MAX_LINE_LENGTH) });
|
|
11804
|
+
}
|
|
11805
|
+
}
|
|
11806
|
+
if (matches.length === 0)
|
|
11807
|
+
continue;
|
|
11808
|
+
const score = Math.max(0.25, 0.65 - i * 0.05);
|
|
11809
|
+
hits.push({
|
|
11810
|
+
...rowToHit(row, score),
|
|
11811
|
+
line: matches[0].line,
|
|
11812
|
+
lineText: matches[0].text,
|
|
11813
|
+
matches
|
|
11814
|
+
});
|
|
11815
|
+
}
|
|
11816
|
+
return hits;
|
|
11817
|
+
}
|
|
11818
|
+
function searchFileContent(query, opts = {}, db) {
|
|
11819
|
+
const d = db ?? getIndexDb();
|
|
11820
|
+
const limit = clampLimit(opts.limit);
|
|
11821
|
+
const ftsQuery = buildFtsQuery(query);
|
|
11822
|
+
if (!ftsQuery)
|
|
11823
|
+
return [];
|
|
11824
|
+
const filters = filterClauses(opts, d);
|
|
11825
|
+
const rows = d.prepare(`SELECT ${CANDIDATE_COLUMNS}
|
|
11826
|
+
FROM file_content_fts fts
|
|
11827
|
+
JOIN files f ON f.id = fts.rowid
|
|
11828
|
+
JOIN index_roots r ON r.id = f.root_id
|
|
11829
|
+
WHERE file_content_fts MATCH ?${filters.sql}
|
|
11830
|
+
ORDER BY fts.rank
|
|
11831
|
+
LIMIT ?`).all(ftsQuery, ...filters.params, Math.max(50, limit * 3));
|
|
11832
|
+
const tokens = tokenize(query);
|
|
11833
|
+
const shortTokens = tokens.filter((t) => t.length < 3).map((t) => t.toLowerCase());
|
|
11834
|
+
const scored = [];
|
|
11835
|
+
for (let i = 0;i < rows.length && scored.length < limit * 2; i++) {
|
|
11836
|
+
const row = rows[i];
|
|
11837
|
+
const absPath = `${row.root_path}/${row.rel_path}`;
|
|
11838
|
+
let content;
|
|
11839
|
+
try {
|
|
11840
|
+
content = readFileSync5(absPath, "utf-8");
|
|
11841
|
+
} catch {
|
|
11842
|
+
continue;
|
|
11843
|
+
}
|
|
11844
|
+
if (shortTokens.length > 0) {
|
|
11845
|
+
const lower = content.toLowerCase();
|
|
11846
|
+
if (!shortTokens.every((t) => lower.includes(t)))
|
|
11847
|
+
continue;
|
|
11848
|
+
}
|
|
11849
|
+
const { matches, tier } = findLineMatches(content, query, tokens);
|
|
11850
|
+
if (matches.length === 0)
|
|
11851
|
+
continue;
|
|
11852
|
+
const base = Math.max(0.25, 0.55 - i * 0.04);
|
|
11853
|
+
const tierBoost = tier === "phrase" ? 0.1 : tier === "all" ? 0.05 : 0;
|
|
11854
|
+
const score = Math.min(CONTENT_MAX_SCORE, base + tierBoost);
|
|
11855
|
+
scored.push({
|
|
11856
|
+
...rowToHit(row, score),
|
|
11857
|
+
line: matches[0].line,
|
|
11858
|
+
lineText: matches[0].text,
|
|
11859
|
+
matches
|
|
11860
|
+
});
|
|
11861
|
+
}
|
|
11862
|
+
return scored.sort((a, b) => b.score - a.score).slice(0, limit);
|
|
11863
|
+
}
|
|
11864
|
+
|
|
11865
|
+
// src/lib/providers/files.ts
|
|
11866
|
+
class FilesProvider {
|
|
11867
|
+
name = "files";
|
|
11868
|
+
displayName = "Local Files";
|
|
11869
|
+
isConfigured() {
|
|
11870
|
+
return hasReadyRoot();
|
|
11871
|
+
}
|
|
11872
|
+
async search(query, options) {
|
|
11873
|
+
autoRefreshStaleRoots();
|
|
11874
|
+
const hits = searchFilePaths(query, { limit: options?.limit ?? 10 });
|
|
11875
|
+
return hits.map((hit) => ({
|
|
11876
|
+
title: hit.name,
|
|
11877
|
+
url: `file://${hit.absPath}`,
|
|
11878
|
+
snippet: hit.absPath,
|
|
11879
|
+
score: hit.score,
|
|
11880
|
+
publishedAt: new Date(hit.mtimeMs).toISOString(),
|
|
11881
|
+
metadata: {
|
|
11882
|
+
root: hit.rootName,
|
|
11883
|
+
relPath: hit.relPath,
|
|
11884
|
+
dir: hit.dir,
|
|
11885
|
+
ext: hit.ext,
|
|
11886
|
+
size: hit.size,
|
|
11887
|
+
isBinary: hit.isBinary
|
|
11888
|
+
}
|
|
11889
|
+
}));
|
|
11890
|
+
}
|
|
11891
|
+
}
|
|
11892
|
+
|
|
11893
|
+
// src/lib/providers/content.ts
|
|
11894
|
+
class ContentProvider {
|
|
11895
|
+
name = "content";
|
|
11896
|
+
displayName = "Local Content";
|
|
11897
|
+
isConfigured() {
|
|
11898
|
+
return hasReadyRoot();
|
|
11899
|
+
}
|
|
11900
|
+
async search(query, options) {
|
|
11901
|
+
autoRefreshStaleRoots();
|
|
11902
|
+
const hits = searchFileContent(query, { limit: options?.limit ?? 10 });
|
|
11903
|
+
return hits.map((hit) => ({
|
|
11904
|
+
title: hit.name,
|
|
11905
|
+
url: `file://${hit.absPath}`,
|
|
11906
|
+
snippet: `${hit.relPath}:${hit.line}: ${hit.lineText}`,
|
|
11907
|
+
score: hit.score,
|
|
11908
|
+
publishedAt: new Date(hit.mtimeMs).toISOString(),
|
|
11909
|
+
metadata: {
|
|
11910
|
+
root: hit.rootName,
|
|
11911
|
+
relPath: hit.relPath,
|
|
11912
|
+
dir: hit.dir,
|
|
11913
|
+
ext: hit.ext,
|
|
11914
|
+
line: hit.line,
|
|
11915
|
+
matches: hit.matches
|
|
11916
|
+
}
|
|
11917
|
+
}));
|
|
11918
|
+
}
|
|
11919
|
+
}
|
|
11920
|
+
|
|
11921
|
+
// src/lib/providers/index.ts
|
|
11922
|
+
var providerFactories = {
|
|
11923
|
+
google: () => new GoogleProvider,
|
|
11924
|
+
serpapi: () => new SerpApiProvider,
|
|
11925
|
+
exa: () => new ExaProvider,
|
|
11926
|
+
perplexity: () => new PerplexityProvider,
|
|
11927
|
+
brave: () => new BraveProvider,
|
|
11928
|
+
bing: () => new BingProvider,
|
|
11929
|
+
twitter: () => new TwitterProvider,
|
|
11930
|
+
reddit: () => new RedditProvider,
|
|
11931
|
+
youtube: () => new YouTubeProvider,
|
|
11932
|
+
hackernews: () => new HackerNewsProvider,
|
|
11933
|
+
github: () => new GitHubProvider,
|
|
11934
|
+
arxiv: () => new ArxivProvider,
|
|
11935
|
+
files: () => new FilesProvider,
|
|
11936
|
+
content: () => new ContentProvider
|
|
11937
|
+
};
|
|
11938
|
+
var instanceCache = new Map;
|
|
11939
|
+
function getProvider2(name) {
|
|
11940
|
+
let provider = instanceCache.get(name);
|
|
11941
|
+
if (!provider) {
|
|
11942
|
+
const factory = providerFactories[name];
|
|
11943
|
+
if (!factory)
|
|
11944
|
+
throw new Error(`Unknown provider: ${name}`);
|
|
11945
|
+
provider = factory();
|
|
11946
|
+
instanceCache.set(name, provider);
|
|
11947
|
+
}
|
|
11948
|
+
return provider;
|
|
11949
|
+
}
|
|
11950
|
+
|
|
11951
|
+
// src/lib/dedup.ts
|
|
11952
|
+
function normalizeUrl(url) {
|
|
11953
|
+
try {
|
|
11954
|
+
const u = new URL(url);
|
|
11955
|
+
if (u.protocol === "file:")
|
|
11956
|
+
return url.replace(/\/+$/, "");
|
|
11957
|
+
u.hostname = u.hostname.toLowerCase();
|
|
11958
|
+
u.pathname = u.pathname.replace(/\/+$/, "") || "/";
|
|
11959
|
+
const params = new URLSearchParams(u.search);
|
|
11960
|
+
const sorted = new URLSearchParams([...params.entries()].sort());
|
|
11961
|
+
u.search = sorted.toString();
|
|
11962
|
+
u.searchParams.delete("utm_source");
|
|
11963
|
+
u.searchParams.delete("utm_medium");
|
|
11964
|
+
u.searchParams.delete("utm_campaign");
|
|
11965
|
+
u.searchParams.delete("utm_content");
|
|
11966
|
+
u.searchParams.delete("utm_term");
|
|
11967
|
+
u.searchParams.delete("ref");
|
|
11968
|
+
u.searchParams.delete("fbclid");
|
|
11969
|
+
u.searchParams.delete("gclid");
|
|
11970
|
+
u.hash = "";
|
|
11971
|
+
return u.toString();
|
|
11972
|
+
} catch {
|
|
11973
|
+
return url.toLowerCase().replace(/\/+$/, "");
|
|
11974
|
+
}
|
|
11975
|
+
}
|
|
11976
|
+
function deduplicateResults(results) {
|
|
11977
|
+
const groups = new Map;
|
|
11978
|
+
for (const result of results) {
|
|
11979
|
+
const key = normalizeUrl(result.url);
|
|
11980
|
+
const existing = groups.get(key);
|
|
11981
|
+
if (existing) {
|
|
11982
|
+
existing.push(result);
|
|
11983
|
+
} else {
|
|
11984
|
+
groups.set(key, [result]);
|
|
11985
|
+
}
|
|
11986
|
+
}
|
|
11987
|
+
const deduped = [];
|
|
11988
|
+
for (const group of groups.values()) {
|
|
11989
|
+
if (group.length === 1) {
|
|
11990
|
+
deduped.push(group[0]);
|
|
11991
|
+
continue;
|
|
11992
|
+
}
|
|
11993
|
+
group.sort((a, b) => (b.score ?? 0) - (a.score ?? 0));
|
|
11994
|
+
const best = group[0];
|
|
11995
|
+
const mergedMetadata = { ...best.metadata };
|
|
11996
|
+
for (let i = 1;i < group.length; i++) {
|
|
11997
|
+
const dup = group[i];
|
|
11998
|
+
mergedMetadata[`also_found_on_${dup.source}`] = true;
|
|
11999
|
+
if (dup.snippet.length > best.snippet.length) {
|
|
12000
|
+
best.snippet = dup.snippet;
|
|
12001
|
+
}
|
|
12002
|
+
}
|
|
12003
|
+
best.metadata = mergedMetadata;
|
|
12004
|
+
deduped.push(best);
|
|
12005
|
+
}
|
|
12006
|
+
deduped.sort((a, b) => (b.score ?? 0) - (a.score ?? 0));
|
|
12007
|
+
deduped.forEach((r, i) => {
|
|
12008
|
+
r.rank = i + 1;
|
|
12009
|
+
});
|
|
12010
|
+
return deduped;
|
|
12011
|
+
}
|
|
12012
|
+
|
|
12013
|
+
// src/lib/search.ts
|
|
12014
|
+
async function unifiedSearch(query, opts = {}) {
|
|
9974
12015
|
const config = getConfig();
|
|
9975
|
-
|
|
9976
|
-
|
|
12016
|
+
const startTime = Date.now();
|
|
12017
|
+
const db = opts.db;
|
|
12018
|
+
let providerNames = opts.providers ?? [];
|
|
12019
|
+
if (opts.profile) {
|
|
12020
|
+
const profile = getProfileByName(opts.profile, db);
|
|
12021
|
+
if (profile) {
|
|
12022
|
+
providerNames = profile.providers;
|
|
12023
|
+
}
|
|
12024
|
+
}
|
|
12025
|
+
if (providerNames.length === 0) {
|
|
12026
|
+
if (config.defaultProviders.length > 0) {
|
|
12027
|
+
providerNames = config.defaultProviders;
|
|
12028
|
+
} else {
|
|
12029
|
+
const dbProviders = listProviders(db);
|
|
12030
|
+
providerNames = dbProviders.filter((p) => p.enabled).map((p) => p.name);
|
|
12031
|
+
}
|
|
12032
|
+
}
|
|
12033
|
+
const errors2 = [];
|
|
12034
|
+
const explicitRequest = (opts.providers?.length ?? 0) > 0 || Boolean(opts.profile);
|
|
12035
|
+
const activeProviders = providerNames.filter((name) => {
|
|
12036
|
+
try {
|
|
12037
|
+
if (getProvider2(name).isConfigured())
|
|
12038
|
+
return true;
|
|
12039
|
+
if (explicitRequest) {
|
|
12040
|
+
errors2.push({
|
|
12041
|
+
provider: name,
|
|
12042
|
+
error: LOCAL_PROVIDER_NAMES.has(name) ? "no index roots ready \u2014 run `search index add <path>` first" : "not configured (missing API key)"
|
|
12043
|
+
});
|
|
12044
|
+
}
|
|
12045
|
+
return false;
|
|
12046
|
+
} catch {
|
|
12047
|
+
return false;
|
|
12048
|
+
}
|
|
12049
|
+
});
|
|
12050
|
+
const searchOptions = {
|
|
12051
|
+
limit: config.defaultLimit,
|
|
12052
|
+
...opts.options
|
|
12053
|
+
};
|
|
12054
|
+
const results = await Promise.allSettled(activeProviders.map(async (name) => {
|
|
12055
|
+
const provider = getProvider2(name);
|
|
12056
|
+
const rawResults = await provider.search(query, searchOptions);
|
|
12057
|
+
updateProviderLastUsed(name, db);
|
|
12058
|
+
return { name, results: rawResults };
|
|
12059
|
+
}));
|
|
12060
|
+
const allResults = [];
|
|
12061
|
+
const searchId = generateId();
|
|
12062
|
+
for (const result of results) {
|
|
12063
|
+
if (result.status === "fulfilled") {
|
|
12064
|
+
const { name, results: rawResults } = result.value;
|
|
12065
|
+
const provider = getProvider2(name);
|
|
12066
|
+
for (let i = 0;i < rawResults.length; i++) {
|
|
12067
|
+
const raw = rawResults[i];
|
|
12068
|
+
allResults.push({
|
|
12069
|
+
id: generateId(),
|
|
12070
|
+
searchId,
|
|
12071
|
+
title: raw.title,
|
|
12072
|
+
url: raw.url,
|
|
12073
|
+
snippet: raw.snippet,
|
|
12074
|
+
source: name,
|
|
12075
|
+
provider: provider.displayName,
|
|
12076
|
+
rank: i + 1,
|
|
12077
|
+
score: raw.score ?? null,
|
|
12078
|
+
publishedAt: raw.publishedAt ?? null,
|
|
12079
|
+
thumbnail: raw.thumbnail ?? null,
|
|
12080
|
+
metadata: raw.metadata ?? {},
|
|
12081
|
+
createdAt: new Date().toISOString()
|
|
12082
|
+
});
|
|
12083
|
+
}
|
|
12084
|
+
} else {
|
|
12085
|
+
const providerName = activeProviders[results.indexOf(result)];
|
|
12086
|
+
errors2.push({
|
|
12087
|
+
provider: providerName,
|
|
12088
|
+
error: result.reason?.message ?? "Unknown error"
|
|
12089
|
+
});
|
|
12090
|
+
}
|
|
12091
|
+
}
|
|
12092
|
+
const shouldDedup = opts.dedup ?? config.dedup;
|
|
12093
|
+
const finalResults = shouldDedup ? deduplicateResults(allResults) : allResults;
|
|
12094
|
+
if (!shouldDedup) {
|
|
12095
|
+
finalResults.sort((a, b) => (b.score ?? 0) - (a.score ?? 0));
|
|
12096
|
+
finalResults.forEach((r, i) => {
|
|
12097
|
+
r.rank = i + 1;
|
|
12098
|
+
});
|
|
12099
|
+
}
|
|
12100
|
+
const duration = Date.now() - startTime;
|
|
12101
|
+
if (activeProviders.length === 0) {
|
|
12102
|
+
return {
|
|
12103
|
+
search: {
|
|
12104
|
+
id: searchId,
|
|
12105
|
+
query,
|
|
12106
|
+
providers: [],
|
|
12107
|
+
profileId: null,
|
|
12108
|
+
resultCount: 0,
|
|
12109
|
+
duration,
|
|
12110
|
+
createdAt: new Date().toISOString()
|
|
12111
|
+
},
|
|
12112
|
+
results: finalResults,
|
|
12113
|
+
errors: errors2
|
|
12114
|
+
};
|
|
12115
|
+
}
|
|
12116
|
+
const persistable = config.recordLocalResults ? finalResults : finalResults.filter((r) => !LOCAL_PROVIDER_NAMES.has(r.source));
|
|
12117
|
+
const search = createSearch({
|
|
12118
|
+
query,
|
|
12119
|
+
providers: activeProviders,
|
|
12120
|
+
resultCount: persistable.length,
|
|
12121
|
+
duration
|
|
12122
|
+
}, db);
|
|
12123
|
+
if (persistable.length > 0) {
|
|
12124
|
+
createResults(persistable.map((r) => ({
|
|
12125
|
+
searchId: search.id,
|
|
12126
|
+
title: r.title,
|
|
12127
|
+
url: r.url,
|
|
12128
|
+
snippet: r.snippet,
|
|
12129
|
+
source: r.source,
|
|
12130
|
+
provider: r.provider,
|
|
12131
|
+
rank: r.rank,
|
|
12132
|
+
score: r.score,
|
|
12133
|
+
publishedAt: r.publishedAt,
|
|
12134
|
+
thumbnail: r.thumbnail,
|
|
12135
|
+
metadata: r.metadata
|
|
12136
|
+
})), db);
|
|
12137
|
+
}
|
|
12138
|
+
updateSearchResults(search.id, persistable.length, duration, db);
|
|
12139
|
+
return {
|
|
12140
|
+
search: { ...search, resultCount: finalResults.length, duration },
|
|
12141
|
+
results: finalResults,
|
|
12142
|
+
errors: errors2
|
|
12143
|
+
};
|
|
12144
|
+
}
|
|
12145
|
+
async function searchSingleProvider(provider, query, options, db) {
|
|
12146
|
+
return unifiedSearch(query, {
|
|
12147
|
+
providers: [provider],
|
|
12148
|
+
options,
|
|
12149
|
+
dedup: false,
|
|
12150
|
+
db
|
|
12151
|
+
});
|
|
12152
|
+
}
|
|
12153
|
+
// src/lib/export.ts
|
|
12154
|
+
function exportResults(searchId, format, db) {
|
|
12155
|
+
const search = getSearch(searchId, db);
|
|
12156
|
+
if (!search)
|
|
12157
|
+
throw new Error(`Search not found: ${searchId}`);
|
|
12158
|
+
const results = listResults(searchId, { limit: 1000 }, db);
|
|
12159
|
+
switch (format) {
|
|
12160
|
+
case "json":
|
|
12161
|
+
return exportJson(results);
|
|
12162
|
+
case "csv":
|
|
12163
|
+
return exportCsv(results);
|
|
12164
|
+
case "md":
|
|
12165
|
+
return exportMarkdown(results, search.query);
|
|
12166
|
+
default:
|
|
12167
|
+
throw new Error(`Unsupported format: ${format}`);
|
|
12168
|
+
}
|
|
12169
|
+
}
|
|
12170
|
+
function exportJson(results) {
|
|
12171
|
+
return JSON.stringify(results, null, 2);
|
|
12172
|
+
}
|
|
12173
|
+
function exportCsv(results) {
|
|
12174
|
+
const headers = ["rank", "title", "url", "snippet", "source", "provider", "score", "published_at"];
|
|
12175
|
+
const lines = [headers.join(",")];
|
|
12176
|
+
for (const r of results) {
|
|
12177
|
+
const row = [
|
|
12178
|
+
r.rank,
|
|
12179
|
+
csvEscape(r.title),
|
|
12180
|
+
csvEscape(r.url),
|
|
12181
|
+
csvEscape(r.snippet),
|
|
12182
|
+
r.source,
|
|
12183
|
+
r.provider,
|
|
12184
|
+
r.score ?? "",
|
|
12185
|
+
r.publishedAt ?? ""
|
|
12186
|
+
];
|
|
12187
|
+
lines.push(row.join(","));
|
|
12188
|
+
}
|
|
12189
|
+
return lines.join(`
|
|
12190
|
+
`);
|
|
12191
|
+
}
|
|
12192
|
+
function exportMarkdown(results, query) {
|
|
12193
|
+
const lines = [];
|
|
12194
|
+
lines.push(`# Search Results: ${query}`);
|
|
12195
|
+
lines.push("");
|
|
12196
|
+
lines.push(`*${results.length} results*`);
|
|
12197
|
+
lines.push("");
|
|
12198
|
+
for (const r of results) {
|
|
12199
|
+
lines.push(`## ${r.rank}. ${r.title}`);
|
|
12200
|
+
lines.push("");
|
|
12201
|
+
lines.push(`**Source:** ${r.provider} | **URL:** ${r.url}`);
|
|
12202
|
+
if (r.publishedAt)
|
|
12203
|
+
lines.push(`**Published:** ${r.publishedAt}`);
|
|
12204
|
+
if (r.score !== null)
|
|
12205
|
+
lines.push(`**Score:** ${r.score.toFixed(3)}`);
|
|
12206
|
+
lines.push("");
|
|
12207
|
+
if (r.snippet) {
|
|
12208
|
+
lines.push(`> ${r.snippet}`);
|
|
12209
|
+
lines.push("");
|
|
12210
|
+
}
|
|
12211
|
+
lines.push("---");
|
|
12212
|
+
lines.push("");
|
|
12213
|
+
}
|
|
12214
|
+
return lines.join(`
|
|
12215
|
+
`);
|
|
12216
|
+
}
|
|
12217
|
+
function csvEscape(value) {
|
|
12218
|
+
if (value.includes(",") || value.includes('"') || value.includes(`
|
|
12219
|
+
`)) {
|
|
12220
|
+
return `"${value.replace(/"/g, '""')}"`;
|
|
12221
|
+
}
|
|
12222
|
+
return value;
|
|
12223
|
+
}
|
|
12224
|
+
// src/lib/local/find.ts
|
|
12225
|
+
function findLocal(query, opts = {}, db) {
|
|
12226
|
+
const kind = opts.kind ?? "both";
|
|
12227
|
+
if (kind !== "file" && kind !== "content" && kind !== "both") {
|
|
12228
|
+
throw new Error(`Invalid kind "${kind}" \u2014 use file, content, or both.`);
|
|
12229
|
+
}
|
|
12230
|
+
const limit = clampLimit(opts.limit);
|
|
12231
|
+
const roots = listRoots(db);
|
|
12232
|
+
if (!hasReadyRoot(db)) {
|
|
12233
|
+
return { query, kind, indexed: false, roots: roots.length, total: 0, results: [] };
|
|
12234
|
+
}
|
|
12235
|
+
if (opts.refresh !== false)
|
|
12236
|
+
autoRefreshStaleRoots(db);
|
|
12237
|
+
const queryOpts = {
|
|
12238
|
+
root: opts.root,
|
|
12239
|
+
ext: opts.ext,
|
|
12240
|
+
dir: opts.dir,
|
|
12241
|
+
limit
|
|
12242
|
+
};
|
|
12243
|
+
const merged = new Map;
|
|
12244
|
+
const pathSearch = opts.regex ? () => searchFilePathsRegex(query, { ...queryOpts, caseSensitive: opts.caseSensitive }, db) : () => searchFilePaths(query, queryOpts, db);
|
|
12245
|
+
const contentSearch = opts.regex ? () => searchFileContentRegex(query, { ...queryOpts, caseSensitive: opts.caseSensitive }, db) : () => searchFileContent(query, queryOpts, db);
|
|
12246
|
+
if (kind === "file" || kind === "both") {
|
|
12247
|
+
for (const hit of pathSearch()) {
|
|
12248
|
+
merged.set(hit.absPath, {
|
|
12249
|
+
path: hit.absPath,
|
|
12250
|
+
root: hit.rootName,
|
|
12251
|
+
kind: "file",
|
|
12252
|
+
score: hit.score,
|
|
12253
|
+
snippet: hit.relPath
|
|
12254
|
+
});
|
|
12255
|
+
}
|
|
12256
|
+
}
|
|
12257
|
+
if (kind === "content" || kind === "both") {
|
|
12258
|
+
for (const hit of contentSearch()) {
|
|
12259
|
+
const existing = merged.get(hit.absPath);
|
|
12260
|
+
if (existing) {
|
|
12261
|
+
existing.kind = "both";
|
|
12262
|
+
existing.score = Math.min(1, Math.max(existing.score, hit.score) + 0.1);
|
|
12263
|
+
existing.line = hit.line;
|
|
12264
|
+
existing.snippet = hit.lineText;
|
|
12265
|
+
existing.matches = hit.matches;
|
|
12266
|
+
} else {
|
|
12267
|
+
merged.set(hit.absPath, {
|
|
12268
|
+
path: hit.absPath,
|
|
12269
|
+
root: hit.rootName,
|
|
12270
|
+
kind: "content",
|
|
12271
|
+
score: hit.score,
|
|
12272
|
+
line: hit.line,
|
|
12273
|
+
snippet: hit.lineText,
|
|
12274
|
+
matches: hit.matches
|
|
12275
|
+
});
|
|
12276
|
+
}
|
|
12277
|
+
}
|
|
12278
|
+
}
|
|
12279
|
+
const results = [...merged.values()].sort((a, b) => b.score - a.score).slice(0, limit);
|
|
12280
|
+
return {
|
|
12281
|
+
query,
|
|
12282
|
+
kind,
|
|
12283
|
+
indexed: true,
|
|
12284
|
+
roots: roots.length,
|
|
12285
|
+
total: results.length,
|
|
12286
|
+
results
|
|
12287
|
+
};
|
|
9977
12288
|
}
|
|
9978
12289
|
export {
|
|
9979
12290
|
updateSearchResults,
|
|
9980
12291
|
updateSavedSearchLastRun,
|
|
9981
12292
|
updateProviderLastUsed,
|
|
9982
12293
|
updateProvider,
|
|
9983
|
-
|
|
12294
|
+
unifiedSearch,
|
|
12295
|
+
syncStorageChanges,
|
|
9984
12296
|
setConfigValue,
|
|
9985
12297
|
setConfig,
|
|
12298
|
+
searchSingleProvider,
|
|
9986
12299
|
searchResultsFts,
|
|
9987
|
-
|
|
12300
|
+
searchFilePaths,
|
|
12301
|
+
searchFileContent,
|
|
12302
|
+
runStorageMigrations,
|
|
9988
12303
|
resetConfig,
|
|
9989
|
-
|
|
9990
|
-
|
|
9991
|
-
|
|
12304
|
+
removeRoot,
|
|
12305
|
+
refreshStaleRoots,
|
|
12306
|
+
pushStorageChanges,
|
|
12307
|
+
pullStorageChanges,
|
|
12308
|
+
parseStorageTables,
|
|
12309
|
+
normalizeRootPath,
|
|
9992
12310
|
listSearches,
|
|
9993
12311
|
listSavedSearches,
|
|
12312
|
+
listRoots,
|
|
9994
12313
|
listResults,
|
|
9995
12314
|
listProviders,
|
|
9996
12315
|
listProfiles,
|
|
9997
12316
|
isProviderConfigured,
|
|
12317
|
+
indexRoot,
|
|
12318
|
+
indexAllRoots,
|
|
12319
|
+
hasReadyRoot,
|
|
12320
|
+
getStorageStatus,
|
|
12321
|
+
getStoragePg,
|
|
12322
|
+
getStorageDatabaseUrl,
|
|
12323
|
+
getStorageDatabaseEnvName,
|
|
12324
|
+
getStorageDatabaseEnv,
|
|
12325
|
+
getStorageConnectionString,
|
|
12326
|
+
getStorageConfig,
|
|
9998
12327
|
getSearchStats,
|
|
9999
12328
|
getSearch,
|
|
10000
12329
|
getSavedSearch,
|
|
12330
|
+
getRoot,
|
|
10001
12331
|
getResult,
|
|
10002
12332
|
getProvider,
|
|
10003
12333
|
getProfileByName,
|
|
10004
12334
|
getProfile,
|
|
12335
|
+
getIndexDbPath,
|
|
12336
|
+
getIndexDbForTesting,
|
|
12337
|
+
getIndexDb,
|
|
10005
12338
|
getDbForTesting,
|
|
10006
12339
|
getDb,
|
|
10007
|
-
getConnectionString,
|
|
10008
12340
|
getConfigValue,
|
|
10009
12341
|
getConfig,
|
|
10010
|
-
getCloudStatus,
|
|
10011
|
-
getCloudPg,
|
|
10012
|
-
getCloudConfig,
|
|
10013
12342
|
generateId,
|
|
12343
|
+
findLocal,
|
|
12344
|
+
exportResults,
|
|
10014
12345
|
enableProvider,
|
|
10015
12346
|
disableProvider,
|
|
10016
12347
|
deleteSearch,
|
|
@@ -10021,18 +12352,28 @@ export {
|
|
|
10021
12352
|
createResults,
|
|
10022
12353
|
createResult,
|
|
10023
12354
|
createProfile,
|
|
12355
|
+
closeIndexDb,
|
|
10024
12356
|
closeDb,
|
|
12357
|
+
autoRefreshStaleRoots,
|
|
10025
12358
|
applyPgMigrations,
|
|
12359
|
+
addRoot,
|
|
10026
12360
|
ValidationError,
|
|
10027
12361
|
SearchProviderNameSchema,
|
|
10028
12362
|
SearchOptionsSchema,
|
|
10029
12363
|
SearchError,
|
|
12364
|
+
STORAGE_TABLES,
|
|
12365
|
+
STORAGE_MODE_ENV,
|
|
12366
|
+
STORAGE_DATABASE_ENV,
|
|
12367
|
+
SEARCH_STORAGE_MODE_FALLBACK_ENV,
|
|
12368
|
+
SEARCH_STORAGE_MODE_ENV,
|
|
12369
|
+
SEARCH_STORAGE_FALLBACK_ENV,
|
|
12370
|
+
SEARCH_STORAGE_ENV,
|
|
10030
12371
|
ProviderError,
|
|
10031
12372
|
PgAdapterAsync,
|
|
10032
12373
|
PROVIDER_NAMES,
|
|
10033
12374
|
NotFoundError,
|
|
12375
|
+
LOCAL_PROVIDER_NAMES,
|
|
10034
12376
|
ExportFormatSchema,
|
|
10035
12377
|
EXPORT_FORMATS,
|
|
10036
|
-
DEFAULT_CONFIG
|
|
10037
|
-
CLOUD_TABLES
|
|
12378
|
+
DEFAULT_CONFIG
|
|
10038
12379
|
};
|