@o-lang/semantic-doc-search 1.0.6 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@o-lang/semantic-doc-search",
3
- "version": "1.0.6",
3
+ "version": "1.0.7",
4
4
  "description": "O-lang Semantic Document Search Resolver with hybrid search, embeddings, rerank, and streaming.",
5
5
  "main": "src/index.js",
6
6
  "type": "commonjs",
@@ -1,4 +1,5 @@
1
- import crypto from "crypto";
1
+ // src/embeddings/local.js
2
+ const crypto = require("crypto");
2
3
 
3
4
  /**
4
5
  * LocalEmbedding
@@ -6,7 +7,7 @@ import crypto from "crypto";
6
7
  * Each string will produce a consistent vector based on a hash.
7
8
  * Note: Not semantic, just a placeholder for testing.
8
9
  */
9
- export class LocalEmbedding {
10
+ class LocalEmbedding {
10
11
  constructor(dim = 512) {
11
12
  this.dim = dim;
12
13
  }
@@ -40,7 +41,7 @@ export class LocalEmbedding {
40
41
  * Convenience function for index.js
41
42
  * Retries local embedding generation (mostly placeholder, but keeps API compatible)
42
43
  */
43
- export async function createEmbeddingWithRetry(text, options = {}, retries = 1) {
44
+ async function createEmbeddingWithRetry(text, options = {}, retries = 1) {
44
45
  const embedder = new LocalEmbedding();
45
46
  for (let attempt = 1; attempt <= retries; attempt++) {
46
47
  try {
@@ -50,3 +51,5 @@ export async function createEmbeddingWithRetry(text, options = {}, retries = 1)
50
51
  }
51
52
  }
52
53
  }
54
+
55
+ module.exports = { LocalEmbedding, createEmbeddingWithRetry };
@@ -1,10 +1,10 @@
1
1
  // src/llm/anthropic.js
2
- import Anthropic from "@anthropic-ai/sdk";
2
+ const Anthropic = require("@anthropic-ai/sdk");
3
3
 
4
4
  /**
5
5
  * Anthropic LLM Provider
6
6
  */
7
- export default class AnthropicProvider {
7
+ class AnthropicProvider {
8
8
  constructor({ apiKey }) {
9
9
  if (!apiKey) console.warn("⚠️ WARNING: ANTHROPIC_API_KEY missing");
10
10
  this.client = new Anthropic({ apiKey });
@@ -34,3 +34,5 @@ export default class AnthropicProvider {
34
34
  return { done: true };
35
35
  }
36
36
  }
37
+
38
+ module.exports = AnthropicProvider;
package/src/llm/groq.js CHANGED
@@ -1,10 +1,11 @@
1
1
  // src/llm/groq.js
2
- // Groq LLM Provider (groq-sdk@0.5.0)
3
- // Multi-tenant safe, streaming-ready
2
+ const Groq = require("groq-sdk");
4
3
 
5
- import Groq from "groq-sdk";
6
-
7
- export default class GroqProvider {
4
+ /**
5
+ * Groq LLM Provider (groq-sdk@0.5.0)
6
+ * Multi-tenant safe, streaming-ready
7
+ */
8
+ class GroqProvider {
8
9
  /**
9
10
  * @param {Object} options
10
11
  * @param {string} options.apiKey - User-provided Groq API key
@@ -50,3 +51,5 @@ export default class GroqProvider {
50
51
  return { done: true };
51
52
  }
52
53
  }
54
+
55
+ module.exports = GroqProvider;
package/src/llm/openai.js CHANGED
@@ -1,11 +1,11 @@
1
1
  // src/llm/openai.js
2
- import OpenAI from "openai";
2
+ const OpenAI = require("openai");
3
3
 
4
4
  /**
5
5
  * OpenAI LLM Provider
6
6
  * Supports GPT-4, GPT-4o-mini, GPT-4o, and any Chat model
7
7
  */
8
- export default class OpenAIProvider {
8
+ class OpenAIProvider {
9
9
  constructor({ apiKey }) {
10
10
  if (!apiKey) console.warn("⚠️ WARNING: OPENAI_API_KEY missing");
11
11
  this.client = new OpenAI({ apiKey });
@@ -41,3 +41,5 @@ export default class OpenAIProvider {
41
41
  return { done: true };
42
42
  }
43
43
  }
44
+
45
+ module.exports = OpenAIProvider;
package/src/llm/router.js CHANGED
@@ -1,8 +1,9 @@
1
- import OpenAIProvider from "./openai.js";
2
- import GroqProvider from "./groq.js";
3
- import AnthropicProvider from "./anthropic.js";
1
+ // src/llm/router.js
2
+ const OpenAIProvider = require("./openai.js");
3
+ const GroqProvider = require("./groq.js");
4
+ const AnthropicProvider = require("./anthropic.js");
4
5
 
5
- export function createLLM({ provider, openaiApiKey, groqApiKey, anthropicApiKey } = {}) {
6
+ function createLLM({ provider, openaiApiKey, groqApiKey, anthropicApiKey } = {}) {
6
7
  switch ((provider || "").toLowerCase()) {
7
8
  case "openai":
8
9
  if (!openaiApiKey) throw new Error("Missing OpenAI API key");
@@ -20,3 +21,5 @@ export function createLLM({ provider, openaiApiKey, groqApiKey, anthropicApiKey
20
21
  throw new Error(`Unsupported LLM provider: ${provider}`);
21
22
  }
22
23
  }
24
+
25
+ module.exports = { createLLM };
@@ -1,8 +1,7 @@
1
1
  // src/server/streamingHandler.js
2
-
3
- import express from "express";
4
- import { initSSE, sendSSE } from "../utils/sseStream.js";
5
- import { llmRouter } from "../llm/router.js";
2
+ const express = require("express");
3
+ const { initSSE, sendSSE } = require("../utils/sseStream.js");
4
+ const { createLLM } = require("../llm/router.js");
6
5
 
7
6
  const router = express.Router();
8
7
 
@@ -18,11 +17,19 @@ router.post("/stream", async (req, res) => {
18
17
  // Initialize SSE
19
18
  initSSE(res);
20
19
 
21
- // LLM router handles streaming token-by-token
22
- await llmRouter.streamQuery(query, provider, options, (token) => {
23
- sendSSE(res, { token });
20
+ // Create LLM instance
21
+ const llm = createLLM({
22
+ provider,
23
+ openaiApiKey: options?.openaiApiKey,
24
+ groqApiKey: options?.groqApiKey,
25
+ anthropicApiKey: options?.anthropicApiKey
24
26
  });
25
27
 
28
+ // Stream the query response
29
+ await llm.stream({ prompt: query, onToken: (token) => {
30
+ sendSSE(res, { token });
31
+ }});
32
+
26
33
  // End stream
27
34
  sendSSE(res, { done: true }, true);
28
35
 
@@ -32,4 +39,4 @@ router.post("/stream", async (req, res) => {
32
39
  }
33
40
  });
34
41
 
35
- export { router as streamingHandler };
42
+ module.exports = { router: router };
@@ -1,27 +1,16 @@
1
1
  // src/utils/chunker.js
2
-
3
- /**
4
- * chunkText
5
- * Split text into overlapping chunks
6
- *
7
- * @param {string} text - the full text
8
- * @param {number} chunkSize - number of characters per chunk
9
- * @param {number} overlap - number of characters overlap between chunks
10
- * @returns string[] - array of text chunks
11
- */
12
- export function chunkText(text, chunkSize = 1000, overlap = 200) {
13
- if (!text || !text.trim()) return [];
14
-
2
+ function chunkText(text, chunkSize, overlap) {
3
+ if (!text) return [];
15
4
  const chunks = [];
16
5
  let start = 0;
17
-
6
+
18
7
  while (start < text.length) {
19
8
  const end = Math.min(start + chunkSize, text.length);
20
- const chunk = text.slice(start, end);
21
- chunks.push(chunk);
22
-
23
- start += chunkSize - overlap; // move forward with overlap
9
+ chunks.push(text.slice(start, end));
10
+ start += chunkSize - overlap;
24
11
  }
25
-
12
+
26
13
  return chunks;
27
14
  }
15
+
16
+ module.exports = { chunkText };
@@ -1,8 +1,8 @@
1
- import fs from "fs";
2
- import path from "path";
3
- import { JSDOM } from "jsdom";
4
- import pdfParse from "pdf-parse";
5
- import { readFile } from "fs/promises";
1
+ // src/utils/extractText.js
2
+ const fs = require("fs");
3
+ const path = require("path");
4
+ const { JSDOM } = require("jsdom");
5
+ const pdfParse = require("pdf-parse");
6
6
 
7
7
  /**
8
8
  * extractTextFromFile
@@ -13,7 +13,7 @@ import { readFile } from "fs/promises";
13
13
  * - .pdf
14
14
  * - .docx (minimal)
15
15
  */
16
- export async function extractTextFromFile(filePath) {
16
+ async function extractTextFromFile(filePath) {
17
17
  const ext = path.extname(filePath).toLowerCase();
18
18
 
19
19
  if (ext === ".txt" || ext === ".md") {
@@ -34,7 +34,7 @@ export async function extractTextFromFile(filePath) {
34
34
 
35
35
  if (ext === ".docx") {
36
36
  // Minimal extraction using ZIP (can be improved)
37
- const { default: StreamZip } = await import("node-stream-zip");
37
+ const StreamZip = require("node-stream-zip").default;
38
38
  const zip = new StreamZip.async({ file: filePath });
39
39
  const content = await zip.entryData("word/document.xml");
40
40
  await zip.close();
@@ -49,7 +49,7 @@ export async function extractTextFromFile(filePath) {
49
49
  * Basic keyword extraction for lexical matching
50
50
  * Returns array of lowercase words, stripped of punctuation
51
51
  */
52
- export function extractKeywords(text = "") {
52
+ function extractKeywords(text = "") {
53
53
  if (!text) return [];
54
54
  return text
55
55
  .toLowerCase()
@@ -57,3 +57,5 @@ export function extractKeywords(text = "") {
57
57
  .split(/\s+/)
58
58
  .filter(Boolean);
59
59
  }
60
+
61
+ module.exports = { extractTextFromFile, extractKeywords };
@@ -1,24 +1,15 @@
1
1
  // src/utils/highlight.js
2
-
3
- /**
4
- * highlightMatches
5
- * Wraps all occurrences of keywords in <mark> tags
6
- *
7
- * @param {string} text - original text
8
- * @param {string[]} keywords - array of keywords to highlight
9
- * @returns string - HTML-safe text with <mark> highlights
10
- */
11
- export function highlightMatches(text, keywords = []) {
12
- if (!text || !keywords || keywords.length === 0) return text;
13
-
14
- let highlighted = text;
15
-
16
- // escape special regex chars in keywords
17
- const escapedKeywords = keywords.map(k => k.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
18
-
19
- const pattern = new RegExp(`\\b(${escapedKeywords.join("|")})\\b`, "gi");
20
-
21
- highlighted = highlighted.replace(pattern, "<mark>$1</mark>");
22
-
23
- return highlighted;
2
+ function highlightMatches(text, keywords) {
3
+ if (!text || !keywords || !keywords.length) return text;
4
+
5
+ let result = text;
6
+ keywords.forEach(keyword => {
7
+ if (!keyword) return;
8
+ const regex = new RegExp(`(${keyword})`, 'gi');
9
+ result = result.replace(regex, '**$1**');
10
+ });
11
+
12
+ return result;
24
13
  }
14
+
15
+ module.exports = { highlightMatches };
@@ -7,7 +7,7 @@
7
7
  * @param {number[]} b
8
8
  * @returns {number} similarity score between -1 and 1
9
9
  */
10
- export function cosine(a, b) {
10
+ function cosine(a, b) {
11
11
  if (!a || !b || a.length !== b.length) return 0;
12
12
 
13
13
  let dot = 0, magA = 0, magB = 0;
@@ -30,7 +30,7 @@ export function cosine(a, b) {
30
30
  * @param {number[]} b
31
31
  * @returns {number} Euclidean distance
32
32
  */
33
- export function euclidean(a, b) {
33
+ function euclidean(a, b) {
34
34
  if (!a || !b || a.length !== b.length) return Infinity;
35
35
 
36
36
  let sum = 0;
@@ -40,3 +40,5 @@ export function euclidean(a, b) {
40
40
 
41
41
  return Math.sqrt(sum);
42
42
  }
43
+
44
+ module.exports = { cosine, euclidean };
@@ -8,7 +8,7 @@
8
8
  * @param {string} data
9
9
  * @param {boolean} [end=false] - whether this is the final message
10
10
  */
11
- export function sendSSE(res, data, end = false) {
11
+ function sendSSE(res, data, end = false) {
12
12
  res.write(`data: ${JSON.stringify(data)}\n\n`);
13
13
  if (end) res.write("event: end\ndata: [DONE]\n\n");
14
14
  }
@@ -19,7 +19,7 @@ export function sendSSE(res, data, end = false) {
19
19
  *
20
20
  * @param {import('http').ServerResponse} res
21
21
  */
22
- export function initSSE(res) {
22
+ function initSSE(res) {
23
23
  res.writeHead(200, {
24
24
  "Content-Type": "text/event-stream",
25
25
  "Cache-Control": "no-cache",
@@ -27,3 +27,5 @@ export function initSSE(res) {
27
27
  });
28
28
  res.write("\n");
29
29
  }
30
+
31
+ module.exports = { sendSSE, initSSE };