@karmaniverous/jeeves-watcher 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cjs/index.js
CHANGED
|
@@ -677,15 +677,13 @@ const ENV_PATTERN = /\$\{([^}]+)\}/g;
|
|
|
677
677
|
* Replace `${VAR_NAME}` patterns in a string with `process.env.VAR_NAME`.
|
|
678
678
|
*
|
|
679
679
|
* @param value - The string to process.
|
|
680
|
-
* @returns The string with env vars
|
|
681
|
-
* @throws If a referenced env var is not set.
|
|
680
|
+
* @returns The string with resolved env vars; unresolvable expressions left untouched.
|
|
682
681
|
*/
|
|
683
682
|
function substituteString(value) {
|
|
684
683
|
return value.replace(ENV_PATTERN, (match, varName) => {
|
|
685
684
|
const envValue = process.env[varName];
|
|
686
|
-
if (envValue === undefined)
|
|
687
|
-
|
|
688
|
-
}
|
|
685
|
+
if (envValue === undefined)
|
|
686
|
+
return match;
|
|
689
687
|
return envValue;
|
|
690
688
|
});
|
|
691
689
|
}
|
|
@@ -680,15 +680,13 @@ const ENV_PATTERN = /\$\{([^}]+)\}/g;
|
|
|
680
680
|
* Replace `${VAR_NAME}` patterns in a string with `process.env.VAR_NAME`.
|
|
681
681
|
*
|
|
682
682
|
* @param value - The string to process.
|
|
683
|
-
* @returns The string with env vars
|
|
684
|
-
* @throws If a referenced env var is not set.
|
|
683
|
+
* @returns The string with resolved env vars; unresolvable expressions left untouched.
|
|
685
684
|
*/
|
|
686
685
|
function substituteString(value) {
|
|
687
686
|
return value.replace(ENV_PATTERN, (match, varName) => {
|
|
688
687
|
const envValue = process.env[varName];
|
|
689
|
-
if (envValue === undefined)
|
|
690
|
-
|
|
691
|
-
}
|
|
688
|
+
if (envValue === undefined)
|
|
689
|
+
return match;
|
|
692
690
|
return envValue;
|
|
693
691
|
});
|
|
694
692
|
}
|
package/dist/index.iife.js
CHANGED
|
@@ -657,15 +657,13 @@
|
|
|
657
657
|
* Replace `${VAR_NAME}` patterns in a string with `process.env.VAR_NAME`.
|
|
658
658
|
*
|
|
659
659
|
* @param value - The string to process.
|
|
660
|
-
* @returns The string with env vars
|
|
661
|
-
* @throws If a referenced env var is not set.
|
|
660
|
+
* @returns The string with resolved env vars; unresolvable expressions left untouched.
|
|
662
661
|
*/
|
|
663
662
|
function substituteString(value) {
|
|
664
663
|
return value.replace(ENV_PATTERN, (match, varName) => {
|
|
665
664
|
const envValue = process.env[varName];
|
|
666
|
-
if (envValue === undefined)
|
|
667
|
-
|
|
668
|
-
}
|
|
665
|
+
if (envValue === undefined)
|
|
666
|
+
return match;
|
|
669
667
|
return envValue;
|
|
670
668
|
});
|
|
671
669
|
}
|
package/dist/index.iife.min.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
!function(e,t,i,r,o,n,s,a,c,l,d,h,u,g,f,p,m,y,w,b,v){"use strict";function M(e){var t=Object.create(null);return e&&Object.keys(e).forEach((function(i){if("default"!==i){var r=Object.getOwnPropertyDescriptor(e,i);Object.defineProperty(t,i,r.get?r:{enumerable:!0,get:function(){return e[i]}})}})),t.default=e,Object.freeze(t)}var P=M(g);function S(e){if(e instanceof Error)return e;if("string"==typeof e)return new Error(e);const t=String("object"==typeof e&&null!==e&&"message"in e?e.message:e),i=new Error(t);return i.cause=e,i}function k(e){const t=e.replace(/\\/g,"/"),i=t.search(/[*?\[]/);if(-1===i)return r.resolve(e);const o=t.slice(0,i),n=o.endsWith("/")?o.slice(0,-1):r.dirname(o);return r.resolve(n)}async function*x(e){let t;try{t=(await i.readdir(e,{withFileTypes:!0})).map((e=>({name:e.name,isDirectory:e.isDirectory()})))}catch{return}for(const o of t){const t=r.resolve(e,o.name);if(o.isDirectory)yield*x(t);else try{(await i.stat(t)).isFile()&&(yield t)}catch{}}}async function z(e,t,i,r){const n=await async function(e,t=[]){const i=e.map((e=>e.replace(/\\/g,"/"))),r=t.map((e=>e.replace(/\\/g,"/"))),n=o(i,{dot:!0}),s=r.length?o(r,{dot:!0}):()=>!1,a=Array.from(new Set(e.map(k))),c=new Set;for(const e of a)for await(const t of x(e)){const e=t.replace(/\\/g,"/");s(e)||n(e)&&c.add(t)}return Array.from(c)}(e,t);for(const e of n)await i[r](e);return n.length}function j(e,t=!1){let i=e.replace(/\\/g,"/").toLowerCase();return t&&(i=i.replace(/^([a-z]):/,((e,t)=>t))),i}function C(e,t){const i=j(e,!0),o=s.createHash("sha256").update(i,"utf8").digest("hex");return r.join(t,`${o}.meta.json`)}async function F(e,t){try{const r=await i.readFile(C(e,t),"utf8");return JSON.parse(r)}catch{return null}}async function R(e,t,o){const n=C(e,t);await i.mkdir(r.dirname(n),{recursive:!0}),await i.writeFile(n,JSON.stringify(o,null,2),"utf8")}async function T(e,t){try{await i.rm(C(e,t))}catch{}}const E=["file_path","chunk_index","total_chunks","content_hash","chunk_text"];function I(e){const{processor:i,vectorStore:r,embeddingProvider:o,logger:s,config:a}=e,c=t({logger:!1});var l;return c.get("/status",(()=>({status:"ok",uptime:process.uptime()}))),c.post("/metadata",(l={processor:i,logger:s},async(e,t)=>{try{const{path:t,metadata:i}=e.body;return await l.processor.processMetadataUpdate(t,i),{ok:!0}}catch(e){return l.logger.error({err:S(e)},"Metadata update failed"),t.status(500).send({error:"Internal server error"})}})),c.post("/search",function(e){return async(t,i)=>{try{const{query:i,limit:r=10}=t.body,o=await e.embeddingProvider.embed([i]);return await e.vectorStore.search(o[0],r)}catch(t){return e.logger.error({err:S(t)},"Search failed"),i.status(500).send({error:"Internal server error"})}}}({embeddingProvider:o,vectorStore:r,logger:s})),c.post("/reindex",function(e){return async(t,i)=>{try{const t=await z(e.config.watch.paths,e.config.watch.ignored,e.processor,"processFile");return await i.status(200).send({ok:!0,filesIndexed:t})}catch(t){return e.logger.error({err:S(t)},"Reindex failed"),await i.status(500).send({error:"Internal server error"})}}}({config:a,processor:i,logger:s})),c.post("/rebuild-metadata",function(e){return async(t,i)=>{try{const t=e.config.metadataDir??".jeeves-metadata",r=[...E];for await(const i of e.vectorStore.scroll()){const e=i.payload,o=e.file_path;if("string"!=typeof o||0===o.length)continue;const s=n.omit(e,r);await R(o,t,s)}return await i.status(200).send({ok:!0})}catch(t){return e.logger.error({err:S(t)},"Rebuild metadata failed"),await i.status(500).send({error:"Internal server error"})}}}({config:a,vectorStore:r,logger:s})),c.post("/config-reindex",function(e){return async(t,i)=>{try{const r=t.body.scope??"rules";return(async()=>{try{if("rules"===r){const t=await z(e.config.watch.paths,e.config.watch.ignored,e.processor,"processRulesUpdate");e.logger.info({scope:r,filesProcessed:t},"Config reindex (rules) completed")}else{const t=await z(e.config.watch.paths,e.config.watch.ignored,e.processor,"processFile");e.logger.info({scope:r,filesProcessed:t},"Config reindex (full) completed")}}catch(t){e.logger.error({err:S(t),scope:r},"Config reindex failed")}})(),await i.status(200).send({status:"started",scope:r})}catch(t){return e.logger.error({err:S(t)},"Config reindex request failed"),await i.status(500).send({error:"Internal server error"})}}}({config:a,processor:i,logger:s})),c}const D={metadataDir:".jeeves-watcher",shutdownTimeoutMs:1e4},A={enabled:!0,debounceMs:1e3},W={host:"127.0.0.1",port:3456},N={level:"info"},q={debounceMs:300,stabilityThresholdMs:500,usePolling:!1,pollIntervalMs:1e3},_={chunkSize:1e3,chunkOverlap:200,dimensions:3072,rateLimitPerMinute:300,concurrency:5},L=c.z.object({paths:c.z.array(c.z.string()).min(1).describe('Glob patterns for files to watch (e.g., "**/*.md"). At least one required.'),ignored:c.z.array(c.z.string()).optional().describe('Glob patterns to exclude from watching (e.g., "**/node_modules/**").'),pollIntervalMs:c.z.number().optional().describe("Polling interval in milliseconds when usePolling is enabled."),usePolling:c.z.boolean().optional().describe("Use polling instead of native file system events (for network drives)."),debounceMs:c.z.number().optional().describe("Debounce delay in milliseconds for file change events."),stabilityThresholdMs:c.z.number().optional().describe("Time in milliseconds a file must remain unchanged before processing.")}),O=c.z.object({enabled:c.z.boolean().optional().describe("Enable automatic reloading when config file changes."),debounceMs:c.z.number().optional().describe("Debounce delay in milliseconds for config file change detection.")}),$=c.z.object({provider:c.z.string().default("gemini").describe('Embedding provider name (e.g., "gemini", "openai").'),model:c.z.string().default("gemini-embedding-001").describe('Embedding model identifier (e.g., "gemini-embedding-001", "text-embedding-3-small").'),chunkSize:c.z.number().optional().describe("Maximum chunk size in characters for text splitting."),chunkOverlap:c.z.number().optional().describe("Character overlap between consecutive chunks."),dimensions:c.z.number().optional().describe("Embedding vector dimensions (must match model output)."),apiKey:c.z.string().optional().describe("API key for embedding provider (supports ${ENV_VAR} substitution)."),rateLimitPerMinute:c.z.number().optional().describe("Maximum embedding API requests per minute (rate limiting)."),concurrency:c.z.number().optional().describe("Maximum concurrent embedding requests.")}),Q=c.z.object({url:c.z.string().describe('Qdrant server URL (e.g., "http://localhost:6333").'),collectionName:c.z.string().describe("Qdrant collection name for vector storage."),apiKey:c.z.string().optional().describe("Qdrant API key for authentication (supports ${ENV_VAR} substitution).")}),K=c.z.object({host:c.z.string().optional().describe('Host address for API server (e.g., "127.0.0.1", "0.0.0.0").'),port:c.z.number().optional().describe("Port for API server (e.g., 3456).")}),J=c.z.object({level:c.z.string().optional().describe("Logging level (trace, debug, info, warn, error, fatal)."),file:c.z.string().optional().describe("Path to log file (logs to stdout if omitted).")}),G=c.z.object({match:c.z.record(c.z.string(),c.z.unknown()).describe("JSON Schema object to match against file attributes."),set:c.z.record(c.z.string(),c.z.unknown()).describe("Metadata fields to set when match succeeds."),map:c.z.union([l.jsonMapMapSchema,c.z.string()]).optional().describe("JsonMap transformation (inline definition or named map reference).")}),U=c.z.object({watch:L.describe("File system watch configuration."),configWatch:O.optional().describe("Configuration file watch settings."),embedding:$.describe("Embedding model configuration."),vectorStore:Q.describe("Qdrant vector store configuration."),metadataDir:c.z.string().optional().describe("Directory for persisted metadata sidecar files."),api:K.optional().describe("API server configuration."),extractors:c.z.record(c.z.string(),c.z.unknown()).optional().describe("Extractor configurations keyed by name."),inferenceRules:c.z.array(G).optional().describe("Rules for inferring metadata from file attributes."),maps:c.z.record(c.z.string(),l.jsonMapMapSchema).optional().describe("Reusable named JsonMap transformations."),logging:J.optional().describe("Logging configuration."),shutdownTimeoutMs:c.z.number().optional().describe("Timeout in milliseconds for graceful shutdown.")}),V=/\$\{([^}]+)\}/g;function B(e){if("string"==typeof e)return function(e){return e.replace(V,((e,t)=>{const i=process.env[t];if(void 0===i)throw new Error(`Environment variable \${${t}} referenced in config is not set.`);return i}))}(e);if(Array.isArray(e))return e.map((e=>B(e)));if(null!==e&&"object"==typeof e){const t={};for(const[i,r]of Object.entries(e))t[i]=B(r);return t}return e}const H="jeeves-watcher";async function Y(e){const t=a.cosmiconfig(H),i=e?await t.load(e):await t.search();if(!i||i.isEmpty)throw new Error("No jeeves-watcher configuration found. Create a .jeeves-watcherrc or jeeves-watcher.config.{js,ts,json,yaml} file.");try{const e=U.parse(i.config);return B((r=e,{...D,...r,watch:{...q,...r.watch},configWatch:{...A,...r.configWatch},embedding:{..._,...r.embedding},api:{...W,...r.api},logging:{...N,...r.logging}}))}catch(e){if(e instanceof c.ZodError){const t=e.issues.map((e=>`${e.path.join(".")}: ${e.message}`)).join("; ");throw new Error(`Invalid jeeves-watcher configuration: ${t}`)}throw e}var r}function Z(e){return e||{warn(e,t){t?console.warn(e,t):console.warn(e)}}}function X(e,t){return e<=0?Promise.resolve():new Promise(((i,r)=>{const o=setTimeout((()=>{s(),i()}),e),n=()=>{s(),r(new Error("Retry sleep aborted"))},s=()=>{clearTimeout(o),t&&t.removeEventListener("abort",n)};if(t){if(t.aborted)return void n();t.addEventListener("abort",n,{once:!0})}}))}function ee(e,t,i,r=0){const o=Math.max(0,e-1),n=Math.min(i,t*2**o),s=r>0?1+Math.random()*r:1;return Math.round(n*s)}async function te(e,t){const i=Math.max(1,t.attempts);let r;for(let o=1;o<=i;o++)try{return await e(o)}catch(e){r=e;if(o>=i)break;const n=ee(o,t.baseDelayMs,t.maxDelayMs,t.jitter);t.onRetry?.({attempt:o,attempts:i,delayMs:n,error:e}),await X(n,t.signal)}throw r}const ie=new Map([["mock",function(e){return function(e){return{dimensions:e,embed:t=>Promise.resolve(t.map((t=>{const i=s.createHash("sha256").update(t,"utf8").digest(),r=[];for(let t=0;t<e;t++){const e=i[t%i.length];r.push(e/127.5-1)}return r})))}}(e.dimensions??768)}],["gemini",function(e,t){if(!e.apiKey)throw new Error("Gemini embedding provider requires config.embedding.apiKey");const i=e.dimensions??3072,r=Z(t),o=new d.GoogleGenerativeAIEmbeddings({apiKey:e.apiKey,model:e.model});return{dimensions:i,async embed(t){const n=await te((async i=>(i>1&&r.warn({attempt:i,provider:"gemini",model:e.model},"Retrying embedding request"),o.embedDocuments(t))),{attempts:5,baseDelayMs:500,maxDelayMs:1e4,jitter:.2,onRetry:({attempt:t,delayMs:i,error:o})=>{r.warn({attempt:t,delayMs:i,provider:"gemini",model:e.model,err:S(o)},"Embedding call failed; will retry")}});for(const e of n)if(e.length!==i)throw new Error(`Gemini embedding returned invalid dimensions: expected ${String(i)}, got ${String(e.length)}`);return n}}}]]);function re(e,t){const i=ie.get(e.provider);if(!i)throw new Error(`Unsupported embedding provider: ${e.provider}`);return i(e,t)}function oe(e){const t=e?.level??"info";if(e?.file){const i=h.transport({target:"pino/file",options:{destination:e.file,mkdir:!0}});return h({level:t},i)}return h({level:t})}function ne(e){return s.createHash("sha256").update(e,"utf8").digest("hex")}const se="6a6f686e-6761-4c74-ad6a-656576657321";function ae(e,t){const i=void 0!==t?`${j(e)}#${String(t)}`:j(e);return u.v5(i,se)}const ce=["content","body","text","snippet","subject","description","summary","transcript"];function le(e){if(!e||"object"!=typeof e)return JSON.stringify(e);const t=e;for(const e of ce){const i=t[e];if("string"==typeof i&&i.trim())return i}return JSON.stringify(e)}async function de(e){const t=await i.readFile(e,"utf8"),{frontmatter:r,body:o}=function(e){const t=e.replace(/^\uFEFF/,"");if(!/^\s*---/.test(t))return{body:e};const i=/^---\s*\n([\s\S]*?)\n---\s*\n?([\s\S]*)$/m.exec(t);if(!i)return{body:e};const[,r,o]=i,n=f.load(r);return{frontmatter:n&&"object"==typeof n&&!Array.isArray(n)?n:void 0,body:o}}(t);return{text:o,frontmatter:r}}async function he(e){return{text:await i.readFile(e,"utf8")}}async function ue(e){const t=await i.readFile(e,"utf8"),r=P.load(t);r("script, style").remove();return{text:r("body").text().trim()||r.text().trim()}}const ge=new Map([[".md",de],[".markdown",de],[".txt",he],[".text",he],[".json",async function(e){const t=await i.readFile(e,"utf8"),r=JSON.parse(t),o=r&&"object"==typeof r&&!Array.isArray(r)?r:void 0;return{text:le(r),json:o}}],[".pdf",async function(e){const t=await i.readFile(e),r=new Uint8Array(t),{extractText:o}=await import("unpdf"),{text:n}=await o(r);return{text:Array.isArray(n)?n.join("\n\n"):n}}],[".docx",async function(e){const t=await i.readFile(e);return{text:(await p.extractRawText({buffer:t})).value}}],[".html",ue],[".htm",ue]]);async function fe(e,t){const i=ge.get(t.toLowerCase());return i?i(e):he(e)}function pe(e,t){return"string"!=typeof e?e:e.replace(/\$\{([^}]+)\}/g,((e,i)=>{const r=n.get(t,i);return null==r?"":"string"==typeof r?r:JSON.stringify(r)}))}function me(e,t){const i={};for(const[r,o]of Object.entries(e))i[r]=pe(o,t);return i}async function ye(e,t,i,r){const o={split:(e,t)=>e.split(t),slice:(e,t,i)=>e.slice(t,i),join:(e,t)=>e.join(t),toLowerCase:e=>e.toLowerCase(),replace:(e,t,i)=>e.replace(t,i),get:(e,t)=>n.get(e,t)};let s={};const a=r??console;for(const{rule:r,validate:n}of e)if(n(t)){const e=me(r.set,t);if(s={...s,...e},r.map){let e;if("string"==typeof r.map){if(e=i?.[r.map],!e){a.warn(`Map reference "${r.map}" not found in named maps. Skipping map transformation.`);continue}}else e=r.map;try{const i=new l.JsonMap(e,o),r=await i.transform(t);r&&"object"==typeof r&&!Array.isArray(r)?s={...s,...r}:a.warn("JsonMap transformation did not return an object; skipping merge.")}catch(e){a.warn(`JsonMap transformation failed: ${e instanceof Error?e.message:String(e)}`)}}}return s}function we(e,t,i,o){const n=e.replace(/\\/g,"/"),s={file:{path:n,directory:r.dirname(n).replace(/\\/g,"/"),filename:r.basename(n),extension:r.extname(n),sizeBytes:t.size,modified:t.mtime.toISOString()}};return i&&(s.frontmatter=i),o&&(s.json=o),s}function be(e){const t=function(){const e=new m({allErrors:!0});return y(e),e.addKeyword({keyword:"glob",type:"string",schemaType:"string",validate:(e,t)=>o.isMatch(t,e)}),e}();return e.map(((e,i)=>({rule:e,validate:t.compile({$id:`rule-${String(i)}`,...e.match})})))}async function ve(e,t,o,n,s){const a=r.extname(e),c=await i.stat(e),l=await fe(e,a),d=we(e,c,l.frontmatter,l.json),h=await ye(t,d,n,s),u=await F(e,o);return{inferred:h,enrichment:u,metadata:{...h,...u??{}},attributes:d,extracted:l}}function Me(e,t){const i=[];for(let r=0;r<t;r++)i.push(ae(e,r));return i}function Pe(e,t=1){if(!e)return t;const i=e.total_chunks;return"number"==typeof i?i:t}class Se{config;embeddingProvider;vectorStore;compiledRules;logger;constructor(e,t,i,r,o){this.config=e,this.embeddingProvider=t,this.vectorStore=i,this.compiledRules=r,this.logger=o}async processFile(e){try{const t=r.extname(e),{metadata:i,extracted:o}=await ve(e,this.compiledRules,this.config.metadataDir,this.config.maps,this.logger);if(!o.text.trim())return void this.logger.debug({filePath:e},"Skipping empty file");const n=ne(o.text),s=ae(e,0),a=await this.vectorStore.getPayload(s);if(a&&a.content_hash===n)return void this.logger.debug({filePath:e},"Content unchanged, skipping");const c=Pe(a),l=this.config.chunkSize??1e3,d=function(e,t,i){const r=e.toLowerCase();return".md"===r||".markdown"===r?new w.MarkdownTextSplitter({chunkSize:t,chunkOverlap:i}):new w.RecursiveCharacterTextSplitter({chunkSize:t,chunkOverlap:i})}(t,l,this.config.chunkOverlap??200),h=await d.splitText(o.text),u=await this.embeddingProvider.embed(h),g=h.map(((t,r)=>({id:ae(e,r),vector:u[r],payload:{...i,file_path:e.replace(/\\/g,"/"),chunk_index:r,total_chunks:h.length,content_hash:n,chunk_text:t}})));if(await this.vectorStore.upsert(g),c>h.length){const t=Me(e,c).slice(h.length);await this.vectorStore.delete(t)}this.logger.info({filePath:e,chunks:h.length},"File processed successfully")}catch(t){this.logger.error({filePath:e,err:S(t)},"Failed to process file")}}async deleteFile(e){try{const t=ae(e,0),i=await this.vectorStore.getPayload(t),r=Me(e,Pe(i));await this.vectorStore.delete(r),await T(e,this.config.metadataDir),this.logger.info({filePath:e},"File deleted from index")}catch(t){this.logger.error({filePath:e,err:S(t)},"Failed to delete file")}}async processMetadataUpdate(e,t){try{const i={...await F(e,this.config.metadataDir)??{},...t};await R(e,this.config.metadataDir,i);const r=ae(e,0),o=await this.vectorStore.getPayload(r);if(!o)return null;const n=Pe(o),s=Me(e,n);return await this.vectorStore.setPayload(s,i),this.logger.info({filePath:e,chunks:n},"Metadata updated"),i}catch(t){return this.logger.error({filePath:e,err:S(t)},"Failed to update metadata"),null}}async processRulesUpdate(e){try{const t=ae(e,0),i=await this.vectorStore.getPayload(t);if(!i)return this.logger.debug({filePath:e},"File not indexed, skipping"),null;const{metadata:r}=await ve(e,this.compiledRules,this.config.metadataDir,this.config.maps,this.logger),o=Pe(i),n=Me(e,o);return await this.vectorStore.setPayload(n,r),this.logger.info({filePath:e,chunks:o},"Rules re-applied"),r}catch(t){return this.logger.error({filePath:e,err:S(t)},"Failed to re-apply rules"),null}}updateRules(e){this.compiledRules=e,this.logger.info({rules:e.length},"Inference rules updated")}}class ke{debounceMs;concurrency;rateLimitPerMinute;started=!1;active=0;debounceTimers=new Map;latestByKey=new Map;normalQueue=[];lowQueue=[];tokens;lastRefillMs=Date.now();drainWaiters=[];constructor(e){this.debounceMs=e.debounceMs,this.concurrency=e.concurrency,this.rateLimitPerMinute=e.rateLimitPerMinute,this.tokens=this.rateLimitPerMinute??Number.POSITIVE_INFINITY}enqueue(e,t){const i=`${e.priority}:${e.path}`;this.latestByKey.set(i,{event:e,fn:t});const r=this.debounceTimers.get(i);r&&clearTimeout(r);const o=setTimeout((()=>{this.debounceTimers.delete(i);const e=this.latestByKey.get(i);e&&(this.latestByKey.delete(i),this.push(e),this.pump())}),this.debounceMs);this.debounceTimers.set(i,o)}process(){this.started=!0,this.pump()}async drain(){this.isIdle()||await new Promise((e=>{this.drainWaiters.push(e)}))}push(e){"low"===e.event.priority?this.lowQueue.push(e):this.normalQueue.push(e)}refillTokens(e){if(void 0===this.rateLimitPerMinute)return;const t=Math.max(0,e-this.lastRefillMs)*(this.rateLimitPerMinute/6e4);this.tokens=Math.min(this.rateLimitPerMinute,this.tokens+t),this.lastRefillMs=e}takeToken(){const e=Date.now();return this.refillTokens(e),!(this.tokens<1)&&(this.tokens-=1,!0)}nextItem(){return this.normalQueue.shift()??this.lowQueue.shift()}pump(){if(this.started){for(;this.active<this.concurrency;){const e=this.nextItem();if(!e)break;if(!this.takeToken()){"low"===e.event.priority?this.lowQueue.unshift(e):this.normalQueue.unshift(e),setTimeout((()=>{this.pump()}),250);break}this.active+=1,Promise.resolve().then((()=>e.fn(e.event))).finally((()=>{this.active-=1,this.pump(),this.maybeResolveDrain()}))}this.maybeResolveDrain()}}isIdle(){return 0===this.active&&0===this.normalQueue.length&&0===this.lowQueue.length&&0===this.debounceTimers.size&&0===this.latestByKey.size}maybeResolveDrain(){if(!this.isIdle())return;const e=this.drainWaiters;this.drainWaiters=[];for(const t of e)t()}}class xe{client;collectionName;dims;log;constructor(e,t,i){this.client=new b.QdrantClient({url:e.url,apiKey:e.apiKey,checkCompatibility:!1}),this.collectionName=e.collectionName,this.dims=t,this.log=Z(i)}async ensureCollection(){try{const e=await this.client.getCollections();e.collections.some((e=>e.name===this.collectionName))||await this.client.createCollection(this.collectionName,{vectors:{size:this.dims,distance:"Cosine"}})}catch(e){throw new Error(`Failed to ensure collection "${this.collectionName}": ${String(e)}`)}}async upsert(e){0!==e.length&&await te((async t=>{t>1&&this.log.warn({attempt:t,operation:"qdrant.upsert",points:e.length},"Retrying Qdrant upsert"),await this.client.upsert(this.collectionName,{wait:!0,points:e.map((e=>({id:e.id,vector:e.vector,payload:e.payload})))})}),{attempts:5,baseDelayMs:500,maxDelayMs:1e4,jitter:.2,onRetry:({attempt:e,delayMs:t,error:i})=>{this.log.warn({attempt:e,delayMs:t,operation:"qdrant.upsert",err:S(i)},"Qdrant upsert failed; will retry")}})}async delete(e){0!==e.length&&await te((async t=>{t>1&&this.log.warn({attempt:t,operation:"qdrant.delete",ids:e.length},"Retrying Qdrant delete"),await this.client.delete(this.collectionName,{wait:!0,points:e})}),{attempts:5,baseDelayMs:500,maxDelayMs:1e4,jitter:.2,onRetry:({attempt:e,delayMs:t,error:i})=>{this.log.warn({attempt:e,delayMs:t,operation:"qdrant.delete",err:S(i)},"Qdrant delete failed; will retry")}})}async setPayload(e,t){0!==e.length&&await this.client.setPayload(this.collectionName,{wait:!0,points:e,payload:t})}async getPayload(e){try{const t=await this.client.retrieve(this.collectionName,{ids:[e],with_payload:!0,with_vector:!1});return 0===t.length?null:t[0].payload}catch{return null}}async search(e,t,i){return(await this.client.search(this.collectionName,{vector:e,limit:t,with_payload:!0,...i?{filter:i}:{}})).map((e=>({id:String(e.id),score:e.score,payload:e.payload})))}async*scroll(e,t=100){let i;for(;;){const r=await this.client.scroll(this.collectionName,{limit:t,with_payload:!0,with_vector:!1,...e?{filter:e}:{},...void 0!==i?{offset:i}:{}});for(const e of r.points)yield{id:String(e.id),payload:e.payload};const o=r.next_page_offset;if(null==o)break;if("string"!=typeof o&&"number"!=typeof o)break;i=o}}}class ze{config;queue;processor;logger;watcher;constructor(e,t,i,r){this.config=e,this.queue=t,this.processor=i,this.logger=r}start(){this.watcher=v.watch(this.config.paths,{ignored:this.config.ignored,usePolling:this.config.usePolling,interval:this.config.pollIntervalMs,awaitWriteFinish:!!this.config.stabilityThresholdMs&&{stabilityThreshold:this.config.stabilityThresholdMs},ignoreInitial:!1}),this.watcher.on("add",(e=>{this.logger.debug({path:e},"File added"),this.queue.enqueue({type:"create",path:e,priority:"normal"},(()=>this.processor.processFile(e)))})),this.watcher.on("change",(e=>{this.logger.debug({path:e},"File changed"),this.queue.enqueue({type:"modify",path:e,priority:"normal"},(()=>this.processor.processFile(e)))})),this.watcher.on("unlink",(e=>{this.logger.debug({path:e},"File removed"),this.queue.enqueue({type:"delete",path:e,priority:"normal"},(()=>this.processor.deleteFile(e)))})),this.watcher.on("error",(e=>{this.logger.error({err:S(e)},"Watcher error")})),this.queue.process(),this.logger.info({paths:this.config.paths},"Filesystem watcher started")}async stop(){this.watcher&&(await this.watcher.close(),this.watcher=void 0,this.logger.info("Filesystem watcher stopped"))}}class je{options;watcher;debounce;constructor(e){this.options=e}start(){this.options.enabled&&(this.watcher=v.watch(this.options.configPath,{ignoreInitial:!0}),this.watcher.on("change",(()=>{this.debounce&&clearTimeout(this.debounce),this.debounce=setTimeout((()=>{this.options.onChange()}),this.options.debounceMs)})),this.watcher.on("error",(e=>{this.options.logger.error({err:S(e)},"Config watcher error")})),this.options.logger.info({configPath:this.options.configPath,debounceMs:this.options.debounceMs},"Config watcher started"))}async stop(){this.debounce&&(clearTimeout(this.debounce),this.debounce=void 0),this.watcher&&(await this.watcher.close(),this.watcher=void 0)}}const Ce={loadConfig:Y,createLogger:oe,createEmbeddingProvider:re,createVectorStoreClient:(e,t,i)=>new xe(e,t,i),compileRules:be,createDocumentProcessor:(e,t,i,r,o)=>new Se(e,t,i,r,o),createEventQueue:e=>new ke(e),createFileSystemWatcher:(e,t,i,r)=>new ze(e,t,i,r),createApiServer:I};class Fe{config;configPath;factories;logger;watcher;queue;server;processor;configWatcher;constructor(e,t,i={}){this.config=e,this.configPath=t,this.factories={...Ce,...i}}async start(){const e=this.factories.createLogger(this.config.logging);let t;this.logger=e;try{t=this.factories.createEmbeddingProvider(this.config.embedding,e)}catch(t){throw e.fatal({err:S(t)},"Failed to create embedding provider"),t}const i=this.factories.createVectorStoreClient(this.config.vectorStore,t.dimensions,e);await i.ensureCollection();const r=this.factories.compileRules(this.config.inferenceRules??[]),o={metadataDir:this.config.metadataDir??".jeeves-metadata",chunkSize:this.config.embedding.chunkSize,chunkOverlap:this.config.embedding.chunkOverlap,maps:this.config.maps},n=this.factories.createDocumentProcessor(o,t,i,r,e);this.processor=n;const s=this.factories.createEventQueue({debounceMs:this.config.watch.debounceMs??2e3,concurrency:this.config.embedding.concurrency??5,rateLimitPerMinute:this.config.embedding.rateLimitPerMinute});this.queue=s;const a=this.factories.createFileSystemWatcher(this.config.watch,s,n,e);this.watcher=a;const c=this.factories.createApiServer({processor:n,vectorStore:i,embeddingProvider:t,queue:s,config:this.config,logger:e});this.server=c,await c.listen({host:this.config.api?.host??"127.0.0.1",port:this.config.api?.port??3456}),a.start(),this.startConfigWatch(),e.info("jeeves-watcher started")}async stop(){if(await this.stopConfigWatch(),this.watcher&&await this.watcher.stop(),this.queue){const e=this.config.shutdownTimeoutMs??1e4;await Promise.race([this.queue.drain().then((()=>!0)),new Promise((t=>{setTimeout((()=>{t(!1)}),e)}))])||this.logger?.warn({timeoutMs:e},"Queue drain timeout hit, forcing shutdown")}this.server&&await this.server.close(),this.logger?.info("jeeves-watcher stopped")}startConfigWatch(){const e=this.logger;if(!e)return;const t=this.config.configWatch?.enabled??!0;if(!t)return;if(!this.configPath)return void e.debug("Config watch enabled, but no config path was provided");const i=this.config.configWatch?.debounceMs??1e4;this.configWatcher=new je({configPath:this.configPath,enabled:t,debounceMs:i,logger:e,onChange:async()=>this.reloadConfig()}),this.configWatcher.start()}async stopConfigWatch(){this.configWatcher&&(await this.configWatcher.stop(),this.configWatcher=void 0)}async reloadConfig(){const e=this.logger,t=this.processor;if(e&&t&&this.configPath){e.info({configPath:this.configPath},"Config change detected, reloading...");try{const i=await this.factories.loadConfig(this.configPath);this.config=i;const r=this.factories.compileRules(i.inferenceRules??[]);t.updateRules(r),e.info({configPath:this.configPath,rules:r.length},"Config reloaded")}catch(t){e.error({err:S(t)},"Failed to reload config")}}}}e.DocumentProcessor=Se,e.EventQueue=ke,e.FileSystemWatcher=ze,e.JeevesWatcher=Fe,e.VectorStoreClient=xe,e.apiConfigSchema=K,e.applyRules=ye,e.buildAttributes=we,e.compileRules=be,e.configWatchConfigSchema=O,e.contentHash=ne,e.createApiServer=I,e.createEmbeddingProvider=re,e.createLogger=oe,e.deleteMetadata=T,e.embeddingConfigSchema=$,e.extractText=fe,e.inferenceRuleSchema=G,e.jeevesWatcherConfigSchema=U,e.loadConfig=Y,e.loggingConfigSchema=J,e.metadataPath=C,e.pointId=ae,e.readMetadata=F,e.startFromConfig=async function(e){const t=await Y(e),i=new Fe(t,e);return function(e){const t=async()=>{await e(),process.exit(0)};process.on("SIGTERM",(()=>{t()})),process.on("SIGINT",(()=>{t()}))}((()=>i.stop())),await i.start(),i},e.vectorStoreConfigSchema=Q,e.watchConfigSchema=L,e.writeMetadata=R}(this["jeeves-watcher"]=this["jeeves-watcher"]||{},Fastify,promises,node_path,picomatch,radash,node_crypto,cosmiconfig,zod,jsonmap,googleGenai,pino,uuid,cheerio,yaml,mammoth,Ajv,addFormats,textsplitters,jsClientRest,chokidar);
|
|
1
|
+
!function(e,t,i,r,o,n,s,a,c,l,d,h,u,g,f,p,m,y,w,b,v){"use strict";function M(e){var t=Object.create(null);return e&&Object.keys(e).forEach((function(i){if("default"!==i){var r=Object.getOwnPropertyDescriptor(e,i);Object.defineProperty(t,i,r.get?r:{enumerable:!0,get:function(){return e[i]}})}})),t.default=e,Object.freeze(t)}var P=M(g);function S(e){if(e instanceof Error)return e;if("string"==typeof e)return new Error(e);const t=String("object"==typeof e&&null!==e&&"message"in e?e.message:e),i=new Error(t);return i.cause=e,i}function k(e){const t=e.replace(/\\/g,"/"),i=t.search(/[*?\[]/);if(-1===i)return r.resolve(e);const o=t.slice(0,i),n=o.endsWith("/")?o.slice(0,-1):r.dirname(o);return r.resolve(n)}async function*x(e){let t;try{t=(await i.readdir(e,{withFileTypes:!0})).map((e=>({name:e.name,isDirectory:e.isDirectory()})))}catch{return}for(const o of t){const t=r.resolve(e,o.name);if(o.isDirectory)yield*x(t);else try{(await i.stat(t)).isFile()&&(yield t)}catch{}}}async function z(e,t,i,r){const n=await async function(e,t=[]){const i=e.map((e=>e.replace(/\\/g,"/"))),r=t.map((e=>e.replace(/\\/g,"/"))),n=o(i,{dot:!0}),s=r.length?o(r,{dot:!0}):()=>!1,a=Array.from(new Set(e.map(k))),c=new Set;for(const e of a)for await(const t of x(e)){const e=t.replace(/\\/g,"/");s(e)||n(e)&&c.add(t)}return Array.from(c)}(e,t);for(const e of n)await i[r](e);return n.length}function j(e,t=!1){let i=e.replace(/\\/g,"/").toLowerCase();return t&&(i=i.replace(/^([a-z]):/,((e,t)=>t))),i}function C(e,t){const i=j(e,!0),o=s.createHash("sha256").update(i,"utf8").digest("hex");return r.join(t,`${o}.meta.json`)}async function F(e,t){try{const r=await i.readFile(C(e,t),"utf8");return JSON.parse(r)}catch{return null}}async function R(e,t,o){const n=C(e,t);await i.mkdir(r.dirname(n),{recursive:!0}),await i.writeFile(n,JSON.stringify(o,null,2),"utf8")}async function T(e,t){try{await i.rm(C(e,t))}catch{}}const E=["file_path","chunk_index","total_chunks","content_hash","chunk_text"];function I(e){const{processor:i,vectorStore:r,embeddingProvider:o,logger:s,config:a}=e,c=t({logger:!1});var l;return c.get("/status",(()=>({status:"ok",uptime:process.uptime()}))),c.post("/metadata",(l={processor:i,logger:s},async(e,t)=>{try{const{path:t,metadata:i}=e.body;return await l.processor.processMetadataUpdate(t,i),{ok:!0}}catch(e){return l.logger.error({err:S(e)},"Metadata update failed"),t.status(500).send({error:"Internal server error"})}})),c.post("/search",function(e){return async(t,i)=>{try{const{query:i,limit:r=10}=t.body,o=await e.embeddingProvider.embed([i]);return await e.vectorStore.search(o[0],r)}catch(t){return e.logger.error({err:S(t)},"Search failed"),i.status(500).send({error:"Internal server error"})}}}({embeddingProvider:o,vectorStore:r,logger:s})),c.post("/reindex",function(e){return async(t,i)=>{try{const t=await z(e.config.watch.paths,e.config.watch.ignored,e.processor,"processFile");return await i.status(200).send({ok:!0,filesIndexed:t})}catch(t){return e.logger.error({err:S(t)},"Reindex failed"),await i.status(500).send({error:"Internal server error"})}}}({config:a,processor:i,logger:s})),c.post("/rebuild-metadata",function(e){return async(t,i)=>{try{const t=e.config.metadataDir??".jeeves-metadata",r=[...E];for await(const i of e.vectorStore.scroll()){const e=i.payload,o=e.file_path;if("string"!=typeof o||0===o.length)continue;const s=n.omit(e,r);await R(o,t,s)}return await i.status(200).send({ok:!0})}catch(t){return e.logger.error({err:S(t)},"Rebuild metadata failed"),await i.status(500).send({error:"Internal server error"})}}}({config:a,vectorStore:r,logger:s})),c.post("/config-reindex",function(e){return async(t,i)=>{try{const r=t.body.scope??"rules";return(async()=>{try{if("rules"===r){const t=await z(e.config.watch.paths,e.config.watch.ignored,e.processor,"processRulesUpdate");e.logger.info({scope:r,filesProcessed:t},"Config reindex (rules) completed")}else{const t=await z(e.config.watch.paths,e.config.watch.ignored,e.processor,"processFile");e.logger.info({scope:r,filesProcessed:t},"Config reindex (full) completed")}}catch(t){e.logger.error({err:S(t),scope:r},"Config reindex failed")}})(),await i.status(200).send({status:"started",scope:r})}catch(t){return e.logger.error({err:S(t)},"Config reindex request failed"),await i.status(500).send({error:"Internal server error"})}}}({config:a,processor:i,logger:s})),c}const D={metadataDir:".jeeves-watcher",shutdownTimeoutMs:1e4},A={enabled:!0,debounceMs:1e3},W={host:"127.0.0.1",port:3456},N={level:"info"},q={debounceMs:300,stabilityThresholdMs:500,usePolling:!1,pollIntervalMs:1e3},_={chunkSize:1e3,chunkOverlap:200,dimensions:3072,rateLimitPerMinute:300,concurrency:5},L=c.z.object({paths:c.z.array(c.z.string()).min(1).describe('Glob patterns for files to watch (e.g., "**/*.md"). At least one required.'),ignored:c.z.array(c.z.string()).optional().describe('Glob patterns to exclude from watching (e.g., "**/node_modules/**").'),pollIntervalMs:c.z.number().optional().describe("Polling interval in milliseconds when usePolling is enabled."),usePolling:c.z.boolean().optional().describe("Use polling instead of native file system events (for network drives)."),debounceMs:c.z.number().optional().describe("Debounce delay in milliseconds for file change events."),stabilityThresholdMs:c.z.number().optional().describe("Time in milliseconds a file must remain unchanged before processing.")}),O=c.z.object({enabled:c.z.boolean().optional().describe("Enable automatic reloading when config file changes."),debounceMs:c.z.number().optional().describe("Debounce delay in milliseconds for config file change detection.")}),Q=c.z.object({provider:c.z.string().default("gemini").describe('Embedding provider name (e.g., "gemini", "openai").'),model:c.z.string().default("gemini-embedding-001").describe('Embedding model identifier (e.g., "gemini-embedding-001", "text-embedding-3-small").'),chunkSize:c.z.number().optional().describe("Maximum chunk size in characters for text splitting."),chunkOverlap:c.z.number().optional().describe("Character overlap between consecutive chunks."),dimensions:c.z.number().optional().describe("Embedding vector dimensions (must match model output)."),apiKey:c.z.string().optional().describe("API key for embedding provider (supports ${ENV_VAR} substitution)."),rateLimitPerMinute:c.z.number().optional().describe("Maximum embedding API requests per minute (rate limiting)."),concurrency:c.z.number().optional().describe("Maximum concurrent embedding requests.")}),$=c.z.object({url:c.z.string().describe('Qdrant server URL (e.g., "http://localhost:6333").'),collectionName:c.z.string().describe("Qdrant collection name for vector storage."),apiKey:c.z.string().optional().describe("Qdrant API key for authentication (supports ${ENV_VAR} substitution).")}),K=c.z.object({host:c.z.string().optional().describe('Host address for API server (e.g., "127.0.0.1", "0.0.0.0").'),port:c.z.number().optional().describe("Port for API server (e.g., 3456).")}),J=c.z.object({level:c.z.string().optional().describe("Logging level (trace, debug, info, warn, error, fatal)."),file:c.z.string().optional().describe("Path to log file (logs to stdout if omitted).")}),G=c.z.object({match:c.z.record(c.z.string(),c.z.unknown()).describe("JSON Schema object to match against file attributes."),set:c.z.record(c.z.string(),c.z.unknown()).describe("Metadata fields to set when match succeeds."),map:c.z.union([l.jsonMapMapSchema,c.z.string()]).optional().describe("JsonMap transformation (inline definition or named map reference).")}),U=c.z.object({watch:L.describe("File system watch configuration."),configWatch:O.optional().describe("Configuration file watch settings."),embedding:Q.describe("Embedding model configuration."),vectorStore:$.describe("Qdrant vector store configuration."),metadataDir:c.z.string().optional().describe("Directory for persisted metadata sidecar files."),api:K.optional().describe("API server configuration."),extractors:c.z.record(c.z.string(),c.z.unknown()).optional().describe("Extractor configurations keyed by name."),inferenceRules:c.z.array(G).optional().describe("Rules for inferring metadata from file attributes."),maps:c.z.record(c.z.string(),l.jsonMapMapSchema).optional().describe("Reusable named JsonMap transformations."),logging:J.optional().describe("Logging configuration."),shutdownTimeoutMs:c.z.number().optional().describe("Timeout in milliseconds for graceful shutdown.")}),V=/\$\{([^}]+)\}/g;function B(e){if("string"==typeof e)return function(e){return e.replace(V,((e,t)=>{const i=process.env[t];return void 0===i?e:i}))}(e);if(Array.isArray(e))return e.map((e=>B(e)));if(null!==e&&"object"==typeof e){const t={};for(const[i,r]of Object.entries(e))t[i]=B(r);return t}return e}const H="jeeves-watcher";async function Y(e){const t=a.cosmiconfig(H),i=e?await t.load(e):await t.search();if(!i||i.isEmpty)throw new Error("No jeeves-watcher configuration found. Create a .jeeves-watcherrc or jeeves-watcher.config.{js,ts,json,yaml} file.");try{const e=U.parse(i.config);return B((r=e,{...D,...r,watch:{...q,...r.watch},configWatch:{...A,...r.configWatch},embedding:{..._,...r.embedding},api:{...W,...r.api},logging:{...N,...r.logging}}))}catch(e){if(e instanceof c.ZodError){const t=e.issues.map((e=>`${e.path.join(".")}: ${e.message}`)).join("; ");throw new Error(`Invalid jeeves-watcher configuration: ${t}`)}throw e}var r}function Z(e){return e||{warn(e,t){t?console.warn(e,t):console.warn(e)}}}function X(e,t){return e<=0?Promise.resolve():new Promise(((i,r)=>{const o=setTimeout((()=>{s(),i()}),e),n=()=>{s(),r(new Error("Retry sleep aborted"))},s=()=>{clearTimeout(o),t&&t.removeEventListener("abort",n)};if(t){if(t.aborted)return void n();t.addEventListener("abort",n,{once:!0})}}))}function ee(e,t,i,r=0){const o=Math.max(0,e-1),n=Math.min(i,t*2**o),s=r>0?1+Math.random()*r:1;return Math.round(n*s)}async function te(e,t){const i=Math.max(1,t.attempts);let r;for(let o=1;o<=i;o++)try{return await e(o)}catch(e){r=e;if(o>=i)break;const n=ee(o,t.baseDelayMs,t.maxDelayMs,t.jitter);t.onRetry?.({attempt:o,attempts:i,delayMs:n,error:e}),await X(n,t.signal)}throw r}const ie=new Map([["mock",function(e){return function(e){return{dimensions:e,embed:t=>Promise.resolve(t.map((t=>{const i=s.createHash("sha256").update(t,"utf8").digest(),r=[];for(let t=0;t<e;t++){const e=i[t%i.length];r.push(e/127.5-1)}return r})))}}(e.dimensions??768)}],["gemini",function(e,t){if(!e.apiKey)throw new Error("Gemini embedding provider requires config.embedding.apiKey");const i=e.dimensions??3072,r=Z(t),o=new d.GoogleGenerativeAIEmbeddings({apiKey:e.apiKey,model:e.model});return{dimensions:i,async embed(t){const n=await te((async i=>(i>1&&r.warn({attempt:i,provider:"gemini",model:e.model},"Retrying embedding request"),o.embedDocuments(t))),{attempts:5,baseDelayMs:500,maxDelayMs:1e4,jitter:.2,onRetry:({attempt:t,delayMs:i,error:o})=>{r.warn({attempt:t,delayMs:i,provider:"gemini",model:e.model,err:S(o)},"Embedding call failed; will retry")}});for(const e of n)if(e.length!==i)throw new Error(`Gemini embedding returned invalid dimensions: expected ${String(i)}, got ${String(e.length)}`);return n}}}]]);function re(e,t){const i=ie.get(e.provider);if(!i)throw new Error(`Unsupported embedding provider: ${e.provider}`);return i(e,t)}function oe(e){const t=e?.level??"info";if(e?.file){const i=h.transport({target:"pino/file",options:{destination:e.file,mkdir:!0}});return h({level:t},i)}return h({level:t})}function ne(e){return s.createHash("sha256").update(e,"utf8").digest("hex")}const se="6a6f686e-6761-4c74-ad6a-656576657321";function ae(e,t){const i=void 0!==t?`${j(e)}#${String(t)}`:j(e);return u.v5(i,se)}const ce=["content","body","text","snippet","subject","description","summary","transcript"];function le(e){if(!e||"object"!=typeof e)return JSON.stringify(e);const t=e;for(const e of ce){const i=t[e];if("string"==typeof i&&i.trim())return i}return JSON.stringify(e)}async function de(e){const t=await i.readFile(e,"utf8"),{frontmatter:r,body:o}=function(e){const t=e.replace(/^\uFEFF/,"");if(!/^\s*---/.test(t))return{body:e};const i=/^---\s*\n([\s\S]*?)\n---\s*\n?([\s\S]*)$/m.exec(t);if(!i)return{body:e};const[,r,o]=i,n=f.load(r);return{frontmatter:n&&"object"==typeof n&&!Array.isArray(n)?n:void 0,body:o}}(t);return{text:o,frontmatter:r}}async function he(e){return{text:await i.readFile(e,"utf8")}}async function ue(e){const t=await i.readFile(e,"utf8"),r=P.load(t);r("script, style").remove();return{text:r("body").text().trim()||r.text().trim()}}const ge=new Map([[".md",de],[".markdown",de],[".txt",he],[".text",he],[".json",async function(e){const t=await i.readFile(e,"utf8"),r=JSON.parse(t),o=r&&"object"==typeof r&&!Array.isArray(r)?r:void 0;return{text:le(r),json:o}}],[".pdf",async function(e){const t=await i.readFile(e),r=new Uint8Array(t),{extractText:o}=await import("unpdf"),{text:n}=await o(r);return{text:Array.isArray(n)?n.join("\n\n"):n}}],[".docx",async function(e){const t=await i.readFile(e);return{text:(await p.extractRawText({buffer:t})).value}}],[".html",ue],[".htm",ue]]);async function fe(e,t){const i=ge.get(t.toLowerCase());return i?i(e):he(e)}function pe(e,t){return"string"!=typeof e?e:e.replace(/\$\{([^}]+)\}/g,((e,i)=>{const r=n.get(t,i);return null==r?"":"string"==typeof r?r:JSON.stringify(r)}))}function me(e,t){const i={};for(const[r,o]of Object.entries(e))i[r]=pe(o,t);return i}async function ye(e,t,i,r){const o={split:(e,t)=>e.split(t),slice:(e,t,i)=>e.slice(t,i),join:(e,t)=>e.join(t),toLowerCase:e=>e.toLowerCase(),replace:(e,t,i)=>e.replace(t,i),get:(e,t)=>n.get(e,t)};let s={};const a=r??console;for(const{rule:r,validate:n}of e)if(n(t)){const e=me(r.set,t);if(s={...s,...e},r.map){let e;if("string"==typeof r.map){if(e=i?.[r.map],!e){a.warn(`Map reference "${r.map}" not found in named maps. Skipping map transformation.`);continue}}else e=r.map;try{const i=new l.JsonMap(e,o),r=await i.transform(t);r&&"object"==typeof r&&!Array.isArray(r)?s={...s,...r}:a.warn("JsonMap transformation did not return an object; skipping merge.")}catch(e){a.warn(`JsonMap transformation failed: ${e instanceof Error?e.message:String(e)}`)}}}return s}function we(e,t,i,o){const n=e.replace(/\\/g,"/"),s={file:{path:n,directory:r.dirname(n).replace(/\\/g,"/"),filename:r.basename(n),extension:r.extname(n),sizeBytes:t.size,modified:t.mtime.toISOString()}};return i&&(s.frontmatter=i),o&&(s.json=o),s}function be(e){const t=function(){const e=new m({allErrors:!0});return y(e),e.addKeyword({keyword:"glob",type:"string",schemaType:"string",validate:(e,t)=>o.isMatch(t,e)}),e}();return e.map(((e,i)=>({rule:e,validate:t.compile({$id:`rule-${String(i)}`,...e.match})})))}async function ve(e,t,o,n,s){const a=r.extname(e),c=await i.stat(e),l=await fe(e,a),d=we(e,c,l.frontmatter,l.json),h=await ye(t,d,n,s),u=await F(e,o);return{inferred:h,enrichment:u,metadata:{...h,...u??{}},attributes:d,extracted:l}}function Me(e,t){const i=[];for(let r=0;r<t;r++)i.push(ae(e,r));return i}function Pe(e,t=1){if(!e)return t;const i=e.total_chunks;return"number"==typeof i?i:t}class Se{config;embeddingProvider;vectorStore;compiledRules;logger;constructor(e,t,i,r,o){this.config=e,this.embeddingProvider=t,this.vectorStore=i,this.compiledRules=r,this.logger=o}async processFile(e){try{const t=r.extname(e),{metadata:i,extracted:o}=await ve(e,this.compiledRules,this.config.metadataDir,this.config.maps,this.logger);if(!o.text.trim())return void this.logger.debug({filePath:e},"Skipping empty file");const n=ne(o.text),s=ae(e,0),a=await this.vectorStore.getPayload(s);if(a&&a.content_hash===n)return void this.logger.debug({filePath:e},"Content unchanged, skipping");const c=Pe(a),l=this.config.chunkSize??1e3,d=function(e,t,i){const r=e.toLowerCase();return".md"===r||".markdown"===r?new w.MarkdownTextSplitter({chunkSize:t,chunkOverlap:i}):new w.RecursiveCharacterTextSplitter({chunkSize:t,chunkOverlap:i})}(t,l,this.config.chunkOverlap??200),h=await d.splitText(o.text),u=await this.embeddingProvider.embed(h),g=h.map(((t,r)=>({id:ae(e,r),vector:u[r],payload:{...i,file_path:e.replace(/\\/g,"/"),chunk_index:r,total_chunks:h.length,content_hash:n,chunk_text:t}})));if(await this.vectorStore.upsert(g),c>h.length){const t=Me(e,c).slice(h.length);await this.vectorStore.delete(t)}this.logger.info({filePath:e,chunks:h.length},"File processed successfully")}catch(t){this.logger.error({filePath:e,err:S(t)},"Failed to process file")}}async deleteFile(e){try{const t=ae(e,0),i=await this.vectorStore.getPayload(t),r=Me(e,Pe(i));await this.vectorStore.delete(r),await T(e,this.config.metadataDir),this.logger.info({filePath:e},"File deleted from index")}catch(t){this.logger.error({filePath:e,err:S(t)},"Failed to delete file")}}async processMetadataUpdate(e,t){try{const i={...await F(e,this.config.metadataDir)??{},...t};await R(e,this.config.metadataDir,i);const r=ae(e,0),o=await this.vectorStore.getPayload(r);if(!o)return null;const n=Pe(o),s=Me(e,n);return await this.vectorStore.setPayload(s,i),this.logger.info({filePath:e,chunks:n},"Metadata updated"),i}catch(t){return this.logger.error({filePath:e,err:S(t)},"Failed to update metadata"),null}}async processRulesUpdate(e){try{const t=ae(e,0),i=await this.vectorStore.getPayload(t);if(!i)return this.logger.debug({filePath:e},"File not indexed, skipping"),null;const{metadata:r}=await ve(e,this.compiledRules,this.config.metadataDir,this.config.maps,this.logger),o=Pe(i),n=Me(e,o);return await this.vectorStore.setPayload(n,r),this.logger.info({filePath:e,chunks:o},"Rules re-applied"),r}catch(t){return this.logger.error({filePath:e,err:S(t)},"Failed to re-apply rules"),null}}updateRules(e){this.compiledRules=e,this.logger.info({rules:e.length},"Inference rules updated")}}class ke{debounceMs;concurrency;rateLimitPerMinute;started=!1;active=0;debounceTimers=new Map;latestByKey=new Map;normalQueue=[];lowQueue=[];tokens;lastRefillMs=Date.now();drainWaiters=[];constructor(e){this.debounceMs=e.debounceMs,this.concurrency=e.concurrency,this.rateLimitPerMinute=e.rateLimitPerMinute,this.tokens=this.rateLimitPerMinute??Number.POSITIVE_INFINITY}enqueue(e,t){const i=`${e.priority}:${e.path}`;this.latestByKey.set(i,{event:e,fn:t});const r=this.debounceTimers.get(i);r&&clearTimeout(r);const o=setTimeout((()=>{this.debounceTimers.delete(i);const e=this.latestByKey.get(i);e&&(this.latestByKey.delete(i),this.push(e),this.pump())}),this.debounceMs);this.debounceTimers.set(i,o)}process(){this.started=!0,this.pump()}async drain(){this.isIdle()||await new Promise((e=>{this.drainWaiters.push(e)}))}push(e){"low"===e.event.priority?this.lowQueue.push(e):this.normalQueue.push(e)}refillTokens(e){if(void 0===this.rateLimitPerMinute)return;const t=Math.max(0,e-this.lastRefillMs)*(this.rateLimitPerMinute/6e4);this.tokens=Math.min(this.rateLimitPerMinute,this.tokens+t),this.lastRefillMs=e}takeToken(){const e=Date.now();return this.refillTokens(e),!(this.tokens<1)&&(this.tokens-=1,!0)}nextItem(){return this.normalQueue.shift()??this.lowQueue.shift()}pump(){if(this.started){for(;this.active<this.concurrency;){const e=this.nextItem();if(!e)break;if(!this.takeToken()){"low"===e.event.priority?this.lowQueue.unshift(e):this.normalQueue.unshift(e),setTimeout((()=>{this.pump()}),250);break}this.active+=1,Promise.resolve().then((()=>e.fn(e.event))).finally((()=>{this.active-=1,this.pump(),this.maybeResolveDrain()}))}this.maybeResolveDrain()}}isIdle(){return 0===this.active&&0===this.normalQueue.length&&0===this.lowQueue.length&&0===this.debounceTimers.size&&0===this.latestByKey.size}maybeResolveDrain(){if(!this.isIdle())return;const e=this.drainWaiters;this.drainWaiters=[];for(const t of e)t()}}class xe{client;collectionName;dims;log;constructor(e,t,i){this.client=new b.QdrantClient({url:e.url,apiKey:e.apiKey,checkCompatibility:!1}),this.collectionName=e.collectionName,this.dims=t,this.log=Z(i)}async ensureCollection(){try{const e=await this.client.getCollections();e.collections.some((e=>e.name===this.collectionName))||await this.client.createCollection(this.collectionName,{vectors:{size:this.dims,distance:"Cosine"}})}catch(e){throw new Error(`Failed to ensure collection "${this.collectionName}": ${String(e)}`)}}async upsert(e){0!==e.length&&await te((async t=>{t>1&&this.log.warn({attempt:t,operation:"qdrant.upsert",points:e.length},"Retrying Qdrant upsert"),await this.client.upsert(this.collectionName,{wait:!0,points:e.map((e=>({id:e.id,vector:e.vector,payload:e.payload})))})}),{attempts:5,baseDelayMs:500,maxDelayMs:1e4,jitter:.2,onRetry:({attempt:e,delayMs:t,error:i})=>{this.log.warn({attempt:e,delayMs:t,operation:"qdrant.upsert",err:S(i)},"Qdrant upsert failed; will retry")}})}async delete(e){0!==e.length&&await te((async t=>{t>1&&this.log.warn({attempt:t,operation:"qdrant.delete",ids:e.length},"Retrying Qdrant delete"),await this.client.delete(this.collectionName,{wait:!0,points:e})}),{attempts:5,baseDelayMs:500,maxDelayMs:1e4,jitter:.2,onRetry:({attempt:e,delayMs:t,error:i})=>{this.log.warn({attempt:e,delayMs:t,operation:"qdrant.delete",err:S(i)},"Qdrant delete failed; will retry")}})}async setPayload(e,t){0!==e.length&&await this.client.setPayload(this.collectionName,{wait:!0,points:e,payload:t})}async getPayload(e){try{const t=await this.client.retrieve(this.collectionName,{ids:[e],with_payload:!0,with_vector:!1});return 0===t.length?null:t[0].payload}catch{return null}}async search(e,t,i){return(await this.client.search(this.collectionName,{vector:e,limit:t,with_payload:!0,...i?{filter:i}:{}})).map((e=>({id:String(e.id),score:e.score,payload:e.payload})))}async*scroll(e,t=100){let i;for(;;){const r=await this.client.scroll(this.collectionName,{limit:t,with_payload:!0,with_vector:!1,...e?{filter:e}:{},...void 0!==i?{offset:i}:{}});for(const e of r.points)yield{id:String(e.id),payload:e.payload};const o=r.next_page_offset;if(null==o)break;if("string"!=typeof o&&"number"!=typeof o)break;i=o}}}class ze{config;queue;processor;logger;watcher;constructor(e,t,i,r){this.config=e,this.queue=t,this.processor=i,this.logger=r}start(){this.watcher=v.watch(this.config.paths,{ignored:this.config.ignored,usePolling:this.config.usePolling,interval:this.config.pollIntervalMs,awaitWriteFinish:!!this.config.stabilityThresholdMs&&{stabilityThreshold:this.config.stabilityThresholdMs},ignoreInitial:!1}),this.watcher.on("add",(e=>{this.logger.debug({path:e},"File added"),this.queue.enqueue({type:"create",path:e,priority:"normal"},(()=>this.processor.processFile(e)))})),this.watcher.on("change",(e=>{this.logger.debug({path:e},"File changed"),this.queue.enqueue({type:"modify",path:e,priority:"normal"},(()=>this.processor.processFile(e)))})),this.watcher.on("unlink",(e=>{this.logger.debug({path:e},"File removed"),this.queue.enqueue({type:"delete",path:e,priority:"normal"},(()=>this.processor.deleteFile(e)))})),this.watcher.on("error",(e=>{this.logger.error({err:S(e)},"Watcher error")})),this.queue.process(),this.logger.info({paths:this.config.paths},"Filesystem watcher started")}async stop(){this.watcher&&(await this.watcher.close(),this.watcher=void 0,this.logger.info("Filesystem watcher stopped"))}}class je{options;watcher;debounce;constructor(e){this.options=e}start(){this.options.enabled&&(this.watcher=v.watch(this.options.configPath,{ignoreInitial:!0}),this.watcher.on("change",(()=>{this.debounce&&clearTimeout(this.debounce),this.debounce=setTimeout((()=>{this.options.onChange()}),this.options.debounceMs)})),this.watcher.on("error",(e=>{this.options.logger.error({err:S(e)},"Config watcher error")})),this.options.logger.info({configPath:this.options.configPath,debounceMs:this.options.debounceMs},"Config watcher started"))}async stop(){this.debounce&&(clearTimeout(this.debounce),this.debounce=void 0),this.watcher&&(await this.watcher.close(),this.watcher=void 0)}}const Ce={loadConfig:Y,createLogger:oe,createEmbeddingProvider:re,createVectorStoreClient:(e,t,i)=>new xe(e,t,i),compileRules:be,createDocumentProcessor:(e,t,i,r,o)=>new Se(e,t,i,r,o),createEventQueue:e=>new ke(e),createFileSystemWatcher:(e,t,i,r)=>new ze(e,t,i,r),createApiServer:I};class Fe{config;configPath;factories;logger;watcher;queue;server;processor;configWatcher;constructor(e,t,i={}){this.config=e,this.configPath=t,this.factories={...Ce,...i}}async start(){const e=this.factories.createLogger(this.config.logging);let t;this.logger=e;try{t=this.factories.createEmbeddingProvider(this.config.embedding,e)}catch(t){throw e.fatal({err:S(t)},"Failed to create embedding provider"),t}const i=this.factories.createVectorStoreClient(this.config.vectorStore,t.dimensions,e);await i.ensureCollection();const r=this.factories.compileRules(this.config.inferenceRules??[]),o={metadataDir:this.config.metadataDir??".jeeves-metadata",chunkSize:this.config.embedding.chunkSize,chunkOverlap:this.config.embedding.chunkOverlap,maps:this.config.maps},n=this.factories.createDocumentProcessor(o,t,i,r,e);this.processor=n;const s=this.factories.createEventQueue({debounceMs:this.config.watch.debounceMs??2e3,concurrency:this.config.embedding.concurrency??5,rateLimitPerMinute:this.config.embedding.rateLimitPerMinute});this.queue=s;const a=this.factories.createFileSystemWatcher(this.config.watch,s,n,e);this.watcher=a;const c=this.factories.createApiServer({processor:n,vectorStore:i,embeddingProvider:t,queue:s,config:this.config,logger:e});this.server=c,await c.listen({host:this.config.api?.host??"127.0.0.1",port:this.config.api?.port??3456}),a.start(),this.startConfigWatch(),e.info("jeeves-watcher started")}async stop(){if(await this.stopConfigWatch(),this.watcher&&await this.watcher.stop(),this.queue){const e=this.config.shutdownTimeoutMs??1e4;await Promise.race([this.queue.drain().then((()=>!0)),new Promise((t=>{setTimeout((()=>{t(!1)}),e)}))])||this.logger?.warn({timeoutMs:e},"Queue drain timeout hit, forcing shutdown")}this.server&&await this.server.close(),this.logger?.info("jeeves-watcher stopped")}startConfigWatch(){const e=this.logger;if(!e)return;const t=this.config.configWatch?.enabled??!0;if(!t)return;if(!this.configPath)return void e.debug("Config watch enabled, but no config path was provided");const i=this.config.configWatch?.debounceMs??1e4;this.configWatcher=new je({configPath:this.configPath,enabled:t,debounceMs:i,logger:e,onChange:async()=>this.reloadConfig()}),this.configWatcher.start()}async stopConfigWatch(){this.configWatcher&&(await this.configWatcher.stop(),this.configWatcher=void 0)}async reloadConfig(){const e=this.logger,t=this.processor;if(e&&t&&this.configPath){e.info({configPath:this.configPath},"Config change detected, reloading...");try{const i=await this.factories.loadConfig(this.configPath);this.config=i;const r=this.factories.compileRules(i.inferenceRules??[]);t.updateRules(r),e.info({configPath:this.configPath,rules:r.length},"Config reloaded")}catch(t){e.error({err:S(t)},"Failed to reload config")}}}}e.DocumentProcessor=Se,e.EventQueue=ke,e.FileSystemWatcher=ze,e.JeevesWatcher=Fe,e.VectorStoreClient=xe,e.apiConfigSchema=K,e.applyRules=ye,e.buildAttributes=we,e.compileRules=be,e.configWatchConfigSchema=O,e.contentHash=ne,e.createApiServer=I,e.createEmbeddingProvider=re,e.createLogger=oe,e.deleteMetadata=T,e.embeddingConfigSchema=Q,e.extractText=fe,e.inferenceRuleSchema=G,e.jeevesWatcherConfigSchema=U,e.loadConfig=Y,e.loggingConfigSchema=J,e.metadataPath=C,e.pointId=ae,e.readMetadata=F,e.startFromConfig=async function(e){const t=await Y(e),i=new Fe(t,e);return function(e){const t=async()=>{await e(),process.exit(0)};process.on("SIGTERM",(()=>{t()})),process.on("SIGINT",(()=>{t()}))}((()=>i.stop())),await i.start(),i},e.vectorStoreConfigSchema=$,e.watchConfigSchema=L,e.writeMetadata=R}(this["jeeves-watcher"]=this["jeeves-watcher"]||{},Fastify,promises,node_path,picomatch,radash,node_crypto,cosmiconfig,zod,jsonmap,googleGenai,pino,uuid,cheerio,yaml,mammoth,Ajv,addFormats,textsplitters,jsClientRest,chokidar);
|
package/dist/mjs/index.js
CHANGED
|
@@ -656,15 +656,13 @@ const ENV_PATTERN = /\$\{([^}]+)\}/g;
|
|
|
656
656
|
* Replace `${VAR_NAME}` patterns in a string with `process.env.VAR_NAME`.
|
|
657
657
|
*
|
|
658
658
|
* @param value - The string to process.
|
|
659
|
-
* @returns The string with env vars
|
|
660
|
-
* @throws If a referenced env var is not set.
|
|
659
|
+
* @returns The string with resolved env vars; unresolvable expressions left untouched.
|
|
661
660
|
*/
|
|
662
661
|
function substituteString(value) {
|
|
663
662
|
return value.replace(ENV_PATTERN, (match, varName) => {
|
|
664
663
|
const envValue = process.env[varName];
|
|
665
|
-
if (envValue === undefined)
|
|
666
|
-
|
|
667
|
-
}
|
|
664
|
+
if (envValue === undefined)
|
|
665
|
+
return match;
|
|
668
666
|
return envValue;
|
|
669
667
|
});
|
|
670
668
|
}
|
package/package.json
CHANGED