listpage-next-ai 0.0.272 → 0.0.274
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/index.d.ts +6 -0
- package/dist/cjs/index.js +183 -1
- package/package.json +1 -1
package/dist/cjs/index.d.ts
CHANGED
|
@@ -18,11 +18,13 @@ export declare interface AgentOptions {
|
|
|
18
18
|
database?: boolean;
|
|
19
19
|
knowledge?: boolean;
|
|
20
20
|
word?: boolean;
|
|
21
|
+
documentSearch?: boolean;
|
|
21
22
|
websearch?: boolean;
|
|
22
23
|
};
|
|
23
24
|
databaseOptions?: DatabaseAgentOptions;
|
|
24
25
|
knowledgeOptions?: KnowledgeAgentOptions;
|
|
25
26
|
wordOptions?: WordToolOptions;
|
|
27
|
+
documentSearchOptions?: DocumentSearchToolOptions;
|
|
26
28
|
websearchOptions?: {};
|
|
27
29
|
model: LanguageModelLike;
|
|
28
30
|
system_prompt: string;
|
|
@@ -94,6 +96,10 @@ declare interface DatabaseToolOptions {
|
|
|
94
96
|
}) => Promise<string>;
|
|
95
97
|
}
|
|
96
98
|
|
|
99
|
+
declare interface DocumentSearchToolOptions {
|
|
100
|
+
markdown: string;
|
|
101
|
+
}
|
|
102
|
+
|
|
97
103
|
declare interface KnowledgeAgentOptions extends KnowledgeToolOptions {
|
|
98
104
|
}
|
|
99
105
|
|
package/dist/cjs/index.js
CHANGED
|
@@ -437,8 +437,189 @@ function createWordTools(options) {
|
|
|
437
437
|
})
|
|
438
438
|
];
|
|
439
439
|
}
|
|
440
|
+
class SimpleMarkdownHeaderTextSplitter {
|
|
441
|
+
constructor(options){
|
|
442
|
+
this.headersToSplitOn = options.headersToSplitOn;
|
|
443
|
+
}
|
|
444
|
+
splitText(text) {
|
|
445
|
+
const lines = text.split('\n');
|
|
446
|
+
const docs = [];
|
|
447
|
+
let currentContent = [];
|
|
448
|
+
let currentMetadata = {};
|
|
449
|
+
const headerMap = new Map(this.headersToSplitOn);
|
|
450
|
+
for (const line of lines){
|
|
451
|
+
const match = line.match(/^(#{1,6}) (.*)/);
|
|
452
|
+
if (match) {
|
|
453
|
+
const hashes = match[1];
|
|
454
|
+
const title = match[2];
|
|
455
|
+
if (headerMap.has(hashes)) {
|
|
456
|
+
if (currentContent.length > 0 && currentContent.some((l)=>l.trim().length > 0)) {
|
|
457
|
+
const contextHeader = this.getHeaderContext(currentMetadata);
|
|
458
|
+
docs.push({
|
|
459
|
+
pageContent: (contextHeader + currentContent.join('\n')).trim(),
|
|
460
|
+
metadata: {
|
|
461
|
+
...currentMetadata
|
|
462
|
+
}
|
|
463
|
+
});
|
|
464
|
+
}
|
|
465
|
+
const currentLevel = hashes.length;
|
|
466
|
+
const headerName = headerMap.get(hashes);
|
|
467
|
+
const newMetadata = {};
|
|
468
|
+
for (const [h, name] of this.headersToSplitOn)if (h.length < currentLevel && currentMetadata[name]) newMetadata[name] = currentMetadata[name];
|
|
469
|
+
newMetadata[headerName] = title;
|
|
470
|
+
currentMetadata = newMetadata;
|
|
471
|
+
currentContent = [];
|
|
472
|
+
continue;
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
currentContent.push(line);
|
|
476
|
+
}
|
|
477
|
+
if (currentContent.length > 0 && currentContent.some((l)=>l.trim().length > 0)) {
|
|
478
|
+
const contextHeader = this.getHeaderContext(currentMetadata);
|
|
479
|
+
docs.push({
|
|
480
|
+
pageContent: (contextHeader + currentContent.join('\n')).trim(),
|
|
481
|
+
metadata: {
|
|
482
|
+
...currentMetadata
|
|
483
|
+
}
|
|
484
|
+
});
|
|
485
|
+
}
|
|
486
|
+
return docs;
|
|
487
|
+
}
|
|
488
|
+
getHeaderContext(metadata) {
|
|
489
|
+
const levels = [
|
|
490
|
+
'h1',
|
|
491
|
+
'h2',
|
|
492
|
+
'h3',
|
|
493
|
+
'h4',
|
|
494
|
+
'h5',
|
|
495
|
+
'h6'
|
|
496
|
+
];
|
|
497
|
+
const headers = [];
|
|
498
|
+
for (const level of levels)if (metadata[level]) headers.push(metadata[level]);
|
|
499
|
+
if (0 === headers.length) return '';
|
|
500
|
+
return `Start of the section: ${headers.join(' > ')}\n\n`;
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
function simpleTokenize(text) {
|
|
504
|
+
const regex = /[\u4e00-\u9fa5]|[a-zA-Z0-9]+/g;
|
|
505
|
+
return text.match(regex) || [];
|
|
506
|
+
}
|
|
507
|
+
class DocumentSearchEngine {
|
|
508
|
+
constructor(){
|
|
509
|
+
this.documents = [];
|
|
510
|
+
this.tokenizedDocs = [];
|
|
511
|
+
this.bm25 = null;
|
|
512
|
+
}
|
|
513
|
+
async loadAndIndex(markdown) {
|
|
514
|
+
const headersToSplitOn = [
|
|
515
|
+
[
|
|
516
|
+
'#',
|
|
517
|
+
'h1'
|
|
518
|
+
],
|
|
519
|
+
[
|
|
520
|
+
'##',
|
|
521
|
+
'h2'
|
|
522
|
+
],
|
|
523
|
+
[
|
|
524
|
+
'###',
|
|
525
|
+
'h3'
|
|
526
|
+
],
|
|
527
|
+
[
|
|
528
|
+
'####',
|
|
529
|
+
'h4'
|
|
530
|
+
]
|
|
531
|
+
];
|
|
532
|
+
const splitter = new SimpleMarkdownHeaderTextSplitter({
|
|
533
|
+
headersToSplitOn
|
|
534
|
+
});
|
|
535
|
+
const splitDocs = splitter.splitText(markdown);
|
|
536
|
+
const newDocs = splitDocs.map((doc)=>({
|
|
537
|
+
pageContent: doc.pageContent,
|
|
538
|
+
metadata: doc.metadata
|
|
539
|
+
}));
|
|
540
|
+
const newTokens = newDocs.map((doc)=>{
|
|
541
|
+
const tokens = simpleTokenize(doc.pageContent);
|
|
542
|
+
return tokens;
|
|
543
|
+
});
|
|
544
|
+
this.documents = [
|
|
545
|
+
...this.documents,
|
|
546
|
+
...newDocs
|
|
547
|
+
];
|
|
548
|
+
this.tokenizedDocs = [
|
|
549
|
+
...this.tokenizedDocs,
|
|
550
|
+
...newTokens
|
|
551
|
+
];
|
|
552
|
+
this.bm25 = new BM25(this.tokenizedDocs);
|
|
553
|
+
}
|
|
554
|
+
search(query, options = {}) {
|
|
555
|
+
if (!this.bm25 || 0 === this.documents.length) return [];
|
|
556
|
+
const { k = 3 } = options;
|
|
557
|
+
const queryTokens = simpleTokenize(query);
|
|
558
|
+
const scores = this.documents.map((_, index)=>{
|
|
559
|
+
const score = this.bm25.getScore(queryTokens, index);
|
|
560
|
+
return {
|
|
561
|
+
index,
|
|
562
|
+
score
|
|
563
|
+
};
|
|
564
|
+
});
|
|
565
|
+
return scores.filter((item)=>item.score > 0).sort((a, b)=>b.score - a.score).slice(0, k).map(({ index, score })=>({
|
|
566
|
+
document: this.documents[index],
|
|
567
|
+
score
|
|
568
|
+
}));
|
|
569
|
+
}
|
|
570
|
+
getDocuments() {
|
|
571
|
+
return this.documents;
|
|
572
|
+
}
|
|
573
|
+
clear() {
|
|
574
|
+
this.documents = [];
|
|
575
|
+
this.tokenizedDocs = [];
|
|
576
|
+
this.bm25 = null;
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
function createDocumentSearchTools(options) {
|
|
580
|
+
const { markdown } = options;
|
|
581
|
+
const engine = new DocumentSearchEngine();
|
|
582
|
+
let isIndexed = false;
|
|
583
|
+
let indexingPromise = null;
|
|
584
|
+
const ensureIndexed = async ()=>{
|
|
585
|
+
if (isIndexed) return;
|
|
586
|
+
if (!indexingPromise) indexingPromise = engine.loadAndIndex(markdown).then(()=>{
|
|
587
|
+
isIndexed = true;
|
|
588
|
+
indexingPromise = null;
|
|
589
|
+
});
|
|
590
|
+
await indexingPromise;
|
|
591
|
+
};
|
|
592
|
+
return [
|
|
593
|
+
(0, external_langchain_namespaceObject.tool)(async ({ query })=>{
|
|
594
|
+
try {
|
|
595
|
+
await ensureIndexed();
|
|
596
|
+
const results = engine.search(query);
|
|
597
|
+
if (0 === results.length) return '未找到相关文档内容。';
|
|
598
|
+
return results.map((res, i)=>{
|
|
599
|
+
const metadataStr = Object.entries(res.document.metadata).map(([key, value])=>`${key}: ${value}`).join(', ');
|
|
600
|
+
return `[结果 ${i + 1}] (得分: ${res.score.toFixed(4)})\n元数据: { ${metadataStr} }\n内容: ${res.document.pageContent}\n`;
|
|
601
|
+
}).join('\n---\n');
|
|
602
|
+
} catch (error) {
|
|
603
|
+
console.error('Document search failed:', error);
|
|
604
|
+
return `检索过程中发生错误: ${error instanceof Error ? error.message : String(error)}`;
|
|
605
|
+
}
|
|
606
|
+
}, {
|
|
607
|
+
name: 'search_local_documents',
|
|
608
|
+
description: '当需要回答关于用户提供文档的问题时使用此工具。该工具会基于 BM25 算法在用户提供的 Markdown 文档中检索相关片段。',
|
|
609
|
+
schema: {
|
|
610
|
+
type: 'object',
|
|
611
|
+
properties: {
|
|
612
|
+
query: {
|
|
613
|
+
type: 'string',
|
|
614
|
+
description: '检索关键词或问题'
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
})
|
|
619
|
+
];
|
|
620
|
+
}
|
|
440
621
|
function createReactAgent(options) {
|
|
441
|
-
const { name, model, system_prompt, tools = [], middleware = [], features, databaseOptions, knowledgeOptions, wordOptions, inject_current_time, max_iterations = 0, simple = false } = options;
|
|
622
|
+
const { name, model, system_prompt, tools = [], middleware = [], features, databaseOptions, knowledgeOptions, wordOptions, documentSearchOptions, inject_current_time, max_iterations = 0, simple = false } = options;
|
|
442
623
|
const agent = (0, external_langchain_namespaceObject.createAgent)({
|
|
443
624
|
name,
|
|
444
625
|
model,
|
|
@@ -447,6 +628,7 @@ function createReactAgent(options) {
|
|
|
447
628
|
...features.database ? createDatabaseTools(databaseOptions) : [],
|
|
448
629
|
...features.knowledge ? createKnowledgeTools(knowledgeOptions) : [],
|
|
449
630
|
...features.word ? createWordTools(wordOptions) : [],
|
|
631
|
+
...features.documentSearch ? createDocumentSearchTools(documentSearchOptions) : [],
|
|
450
632
|
...tools
|
|
451
633
|
].filter(Boolean),
|
|
452
634
|
middleware: [
|