@o-lang/semantic-doc-search 1.0.41 → 1.0.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,15 +0,0 @@
1
- {
2
- "clean_up_tokenization_spaces": true,
3
- "cls_token": "[CLS]",
4
- "do_basic_tokenize": true,
5
- "do_lower_case": true,
6
- "mask_token": "[MASK]",
7
- "model_max_length": 512,
8
- "never_split": null,
9
- "pad_token": "[PAD]",
10
- "sep_token": "[SEP]",
11
- "strip_accents": null,
12
- "tokenize_chinese_chars": true,
13
- "tokenizer_class": "BertTokenizer",
14
- "unk_token": "[UNK]"
15
- }
package/.env.example DELETED
File without changes
package/bin/cli.js DELETED
@@ -1,58 +0,0 @@
1
- #!/usr/bin/env node
2
- const yargs = require("yargs");
3
- const { hideBin } = require("yargs/helpers");
4
- const resolver = require("../src/index.js");
5
-
6
- const argv = yargs(hideBin(process.argv))
7
- .usage("Usage: $0 <query> [options]")
8
- .option("provider", {
9
- type: "string",
10
- describe: "LLM provider: local | openai | groq | anthropic",
11
- default: "local",
12
- })
13
- .option("openai-key", { type: "string", describe: "OpenAI API key" })
14
- .option("groq-key", { type: "string", describe: "Groq API key" })
15
- .option("anthropic-key", { type: "string", describe: "Anthropic API key" })
16
- .option("model", { type: "string", describe: "LLM model to use" })
17
- .option("doc-root", { type: "string", describe: "Directory of documents" })
18
- .option("stream", { type: "boolean", describe: "Stream output if supported", default: false })
19
- .option("vector-backend", {
20
- type: "string",
21
- describe: "Vector backend to use: pgvector | memory | pinecone | redis",
22
- default: "pgvector"
23
- })
24
- .demandCommand(1, "Please provide a query")
25
- .help()
26
- .argv;
27
-
28
- // Build context for resolver
29
- const context = {
30
- query: argv._.join(" "),
31
- doc_root: argv.docRoot,
32
- stream: argv.stream,
33
- vectorBackend: argv["vector-backend"], // NEW
34
- options: {
35
- provider: argv.provider,
36
- openaiApiKey: argv["openai-key"] || process.env.OPENAI_API_KEY,
37
- groqApiKey: argv["groq-key"] || process.env.GROQ_API_KEY,
38
- anthropicApiKey: argv["anthropic-key"] || process.env.ANTHROPIC_API_KEY,
39
- model: argv.model,
40
- },
41
- onToken: token => {
42
- if (argv.stream) process.stdout.write(token);
43
- },
44
- };
45
-
46
- (async () => {
47
- try {
48
- // Pass vectorBackend in the config
49
- const result = await resolver("search", context);
50
- if (!argv.stream) {
51
- console.log("\n\n✅ Result:\n");
52
- console.log(result.text, "\n");
53
- console.log("Meta:", result.meta);
54
- }
55
- } catch (err) {
56
- console.error("\n❌ Error running search:", err);
57
- }
58
- })();
package/docs/sample1.txt DELETED
@@ -1 +0,0 @@
1
- Semantic search is a technique that improves search results by understanding the meaning of words in a query rather than relying solely on keyword matching.
@@ -1,5 +0,0 @@
1
- # Vacation Policy 2025
2
-
3
- Full-time employees accrue 20 days of paid time off annually.
4
- Part-time employees accrue leave on a pro-rata basis.
5
- New employees begin accruing vacation immediately upon hire.
package/embeddings.json DELETED
@@ -1,3 +0,0 @@
1
- {
2
- "d17dd8bfdf22150455121531195faaa533b90a80ae55ac2d8615e517ea25bf59": true
3
- }
@@ -1,36 +0,0 @@
1
- // test-doc-search-batch.js
2
- const docSearchResolver = require("./src/index.js");
3
-
4
- (async () => {
5
- try {
6
- const context = {
7
- doc_root: "./docs", // folder with .txt or .md files
8
- vectorBackend: "memory", // can also switch to "pgvector" if configured
9
- };
10
-
11
- const queries = [
12
- "Semantic search",
13
- "Vacation policy",
14
- "Employee onboarding",
15
- "Leave requests",
16
- "HR compliance"
17
- ];
18
-
19
- console.log("🔎 Running batch doc-search...");
20
-
21
- for (const query of queries) {
22
- const action = `Ask doc-search "${query}"`;
23
- const result = await docSearchResolver(action, context);
24
-
25
- console.log("\n====================================");
26
- console.log(`Query: "${query}"`);
27
- console.log("Text:\n", result.text || "(No matches found)");
28
- console.log("Meta:", result.meta);
29
- console.log("====================================");
30
- }
31
-
32
- console.log("\n✅ Batch search complete!");
33
- } catch (err) {
34
- console.error("❌ Batch doc-search test failed:", err);
35
- }
36
- })();
@@ -1,40 +0,0 @@
1
- // test-doc-search.js
2
- const resolver = require('./src/resolver');
3
-
4
- async function testDocSearch() {
5
- console.log('🧪 Testing doc-search resolver...');
6
-
7
- const fs = require('fs');
8
- const path = require('path');
9
- const testDir = './test-docs';
10
- if (!fs.existsSync(testDir)) fs.mkdirSync(testDir);
11
- fs.writeFileSync(path.join(testDir, 'policy.md'),
12
- '# Vacation Policy\nFull-time employees get 20 days PTO.\nNew hires accrue immediately.'
13
- );
14
-
15
- try {
16
- const result = await resolver('Ask doc-search "vacation policy"', {
17
- doc_root: './test-docs',
18
- topK: 3,
19
- minScore: 0
20
- });
21
-
22
- console.log('✅ SUCCESS!');
23
-
24
- if (result?.matches?.length > 0) {
25
- const fullText = result.matches.map(m => m.content).join('\n\n');
26
- console.log('📄 Retrieved text length:', fullText.length);
27
- console.log('🔍 First 100 chars:', fullText.substring(0, 100));
28
- } else {
29
- console.log('⚠️ No matches found (check minScore or embedding quality)');
30
- }
31
-
32
- fs.rmSync(testDir, { recursive: true, force: true });
33
- } catch (err) {
34
- console.error('❌ FAILED:', err);
35
- if (fs.existsSync(testDir)) fs.rmSync(testDir, { recursive: true });
36
- process.exit(1);
37
- }
38
- }
39
-
40
- testDocSearch();
package/test-embed.js DELETED
@@ -1,10 +0,0 @@
1
- // test-embed.js
2
- const embedder = require("./src/embeddings/local");
3
-
4
- async function test() {
5
- console.log("Model dimension:", embedder.getDimension());
6
- const vector = await embedder.embed("hello world");
7
- console.log("Embedding result:", vector?.length, vector);
8
- }
9
-
10
- test().catch(console.error);
@@ -1,32 +0,0 @@
1
- // test-single-doc.js
2
- const path = require("path");
3
- const { LocalEmbedding } = require("./embeddings/local.js");
4
- const { chunkText } = require("./utils/chunker.js");
5
- const VectorRouter = require("./adapters/vectorRouter");
6
-
7
- (async () => {
8
- const embedder = new LocalEmbedding();
9
- const docPath = path.join(process.cwd(), "docs", "sample1.txt");
10
- const fs = require("fs");
11
- const content = fs.readFileSync(docPath, "utf8");
12
-
13
- const chunks = chunkText(content, 500);
14
- console.log(`Document split into ${chunks.length} chunk(s)`);
15
-
16
- const vectorStore = VectorRouter.create({ backend: "memory", dimension: embedder.getDimension() });
17
-
18
- for (let i = 0; i < chunks.length; i++) {
19
- const vector = await embedder.embed(chunks[i]);
20
- console.log(`Chunk ${i} embedding first 5 dims:`, vector.slice(0, 5));
21
-
22
- await vectorStore.upsert({ id: `sample1:${i}`, vector, content: chunks[i], source: "file:sample1.txt" });
23
- }
24
-
25
- const query = "Semantic search";
26
- const queryVector = await embedder.embed(query);
27
-
28
- const results = await vectorStore.query(queryVector, { topK: 5 });
29
- results.forEach((r, idx) => {
30
- console.log(`Result ${idx}: score=${r.score.toFixed(3)} content=${r.content.substring(0, 50)}...`);
31
- });
32
- })();