@echoes-io/mcp-server 4.1.1 → 6.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +120 -185
- package/cli/index.d.ts +1 -0
- package/cli/index.d.ts.map +1 -0
- package/cli/index.js +3 -185
- package/cli/index.js.map +1 -0
- package/cli/program.d.ts +3 -0
- package/cli/program.d.ts.map +1 -0
- package/cli/program.js +151 -0
- package/cli/program.js.map +1 -0
- package/lib/constants.d.ts +8 -0
- package/lib/constants.d.ts.map +1 -0
- package/lib/constants.js +12 -0
- package/lib/constants.js.map +1 -0
- package/lib/database/index.d.ts +34 -0
- package/lib/database/index.d.ts.map +1 -0
- package/lib/database/index.js +266 -0
- package/lib/database/index.js.map +1 -0
- package/lib/database/schemas.d.ts +55 -0
- package/lib/database/schemas.d.ts.map +1 -0
- package/lib/database/schemas.js +70 -0
- package/lib/database/schemas.js.map +1 -0
- package/lib/indexer/embeddings.d.ts +6 -0
- package/lib/indexer/embeddings.d.ts.map +1 -0
- package/lib/indexer/embeddings.js +51 -0
- package/lib/indexer/embeddings.js.map +1 -0
- package/lib/indexer/extractor.d.ts +81 -0
- package/lib/indexer/extractor.d.ts.map +1 -0
- package/lib/indexer/extractor.js +68 -0
- package/lib/indexer/extractor.js.map +1 -0
- package/lib/indexer/scanner.d.ts +8 -0
- package/lib/indexer/scanner.d.ts.map +1 -0
- package/lib/indexer/scanner.js +73 -0
- package/lib/indexer/scanner.js.map +1 -0
- package/lib/indexer/tasks.d.ts +32 -0
- package/lib/indexer/tasks.d.ts.map +1 -0
- package/lib/indexer/tasks.js +228 -0
- package/lib/indexer/tasks.js.map +1 -0
- package/lib/prompts/index.d.ts +13 -0
- package/lib/prompts/index.d.ts.map +1 -0
- package/lib/prompts/index.js +153 -0
- package/lib/prompts/index.js.map +1 -0
- package/lib/server.d.ts +13 -0
- package/lib/server.d.ts.map +1 -0
- package/lib/server.js +90 -0
- package/lib/server.js.map +1 -0
- package/lib/tools/index.d.ts +14 -0
- package/lib/tools/index.d.ts.map +1 -0
- package/lib/tools/index.js +25 -0
- package/lib/tools/index.js.map +1 -0
- package/lib/tools/search.d.ts +86 -0
- package/lib/tools/search.d.ts.map +1 -0
- package/lib/tools/search.js +95 -0
- package/lib/tools/search.js.map +1 -0
- package/lib/tools/stats.d.ts +18 -0
- package/lib/tools/stats.d.ts.map +1 -0
- package/lib/tools/stats.js +62 -0
- package/lib/tools/stats.js.map +1 -0
- package/{src → lib}/tools/words-count.d.ts +6 -3
- package/lib/tools/words-count.d.ts.map +1 -0
- package/lib/tools/words-count.js +31 -0
- package/lib/tools/words-count.js.map +1 -0
- package/{src/types/frontmatter.d.ts → lib/types.d.ts} +11 -17
- package/lib/types.d.ts.map +1 -0
- package/lib/types.js +2 -0
- package/lib/types.js.map +1 -0
- package/lib/utils.d.ts +19 -0
- package/lib/utils.d.ts.map +1 -0
- package/lib/utils.js +40 -0
- package/lib/utils.js.map +1 -0
- package/package.json +60 -62
- package/src/database/index.d.ts +0 -6
- package/src/database/index.js +0 -26
- package/src/database/relations.d.ts +0 -744
- package/src/database/relations.js +0 -52
- package/src/database/schema.d.ts +0 -733
- package/src/database/schema.js +0 -69
- package/src/database/vector.d.ts +0 -25
- package/src/database/vector.js +0 -98
- package/src/index.d.ts +0 -5
- package/src/index.js +0 -5
- package/src/rag/character-ner.d.ts +0 -36
- package/src/rag/character-ner.js +0 -416
- package/src/rag/database-sync.d.ts +0 -38
- package/src/rag/database-sync.js +0 -158
- package/src/rag/embeddings.d.ts +0 -74
- package/src/rag/embeddings.js +0 -164
- package/src/rag/graph-rag.d.ts +0 -69
- package/src/rag/graph-rag.js +0 -311
- package/src/rag/hybrid-rag.d.ts +0 -109
- package/src/rag/hybrid-rag.js +0 -255
- package/src/rag/index.d.ts +0 -16
- package/src/rag/index.js +0 -33
- package/src/server.d.ts +0 -43
- package/src/server.js +0 -177
- package/src/tools/index-rag.d.ts +0 -19
- package/src/tools/index-rag.js +0 -85
- package/src/tools/index-tracker.d.ts +0 -17
- package/src/tools/index-tracker.js +0 -89
- package/src/tools/index.d.ts +0 -5
- package/src/tools/index.js +0 -5
- package/src/tools/rag-context.d.ts +0 -34
- package/src/tools/rag-context.js +0 -51
- package/src/tools/rag-search.d.ts +0 -35
- package/src/tools/rag-search.js +0 -60
- package/src/tools/words-count.js +0 -28
- package/src/types/frontmatter.js +0 -1
- package/src/utils/index.d.ts +0 -1
- package/src/utils/index.js +0 -1
- package/src/utils/markdown.d.ts +0 -6
- package/src/utils/markdown.js +0 -36
- package/src/utils/timeline-detection.d.ts +0 -13
- package/src/utils/timeline-detection.js +0 -76
package/src/database/schema.js
DELETED
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
import { sql } from 'drizzle-orm';
|
|
2
|
-
import { blob, integer, sqliteTable, text } from 'drizzle-orm/sqlite-core';
|
|
3
|
-
// Helpers
|
|
4
|
-
const uuidColumn = (name) => text(name)
|
|
5
|
-
.primaryKey()
|
|
6
|
-
.$default(() => crypto.randomUUID());
|
|
7
|
-
const timestampColumn = (name) => text(name).default(sql `(datetime('now'))`).notNull();
|
|
8
|
-
// Tables
|
|
9
|
-
export const timelines = sqliteTable('timelines', {
|
|
10
|
-
id: uuidColumn('id'),
|
|
11
|
-
name: text('name').notNull(),
|
|
12
|
-
description: text('description'),
|
|
13
|
-
createdAt: timestampColumn('created_at'),
|
|
14
|
-
updatedAt: timestampColumn('updated_at'),
|
|
15
|
-
});
|
|
16
|
-
export const arcs = sqliteTable('arcs', {
|
|
17
|
-
id: uuidColumn('id'),
|
|
18
|
-
timelineId: text('timeline_id')
|
|
19
|
-
.notNull()
|
|
20
|
-
.references(() => timelines.id),
|
|
21
|
-
name: text('name').notNull(),
|
|
22
|
-
slug: text('slug').notNull(),
|
|
23
|
-
description: text('description'),
|
|
24
|
-
order: integer('order').notNull(),
|
|
25
|
-
createdAt: timestampColumn('created_at'),
|
|
26
|
-
updatedAt: timestampColumn('updated_at'),
|
|
27
|
-
});
|
|
28
|
-
export const episodes = sqliteTable('episodes', {
|
|
29
|
-
id: uuidColumn('id'),
|
|
30
|
-
arcId: text('arc_id')
|
|
31
|
-
.notNull()
|
|
32
|
-
.references(() => arcs.id),
|
|
33
|
-
number: integer('number').notNull(),
|
|
34
|
-
title: text('title').notNull(),
|
|
35
|
-
slug: text('slug').notNull(),
|
|
36
|
-
description: text('description'),
|
|
37
|
-
createdAt: timestampColumn('created_at'),
|
|
38
|
-
updatedAt: timestampColumn('updated_at'),
|
|
39
|
-
});
|
|
40
|
-
export const chapters = sqliteTable('chapters', {
|
|
41
|
-
id: uuidColumn('id'),
|
|
42
|
-
episodeId: text('episode_id')
|
|
43
|
-
.notNull()
|
|
44
|
-
.references(() => episodes.id),
|
|
45
|
-
number: integer('number').notNull(),
|
|
46
|
-
part: integer('part').notNull().default(1),
|
|
47
|
-
pov: text('pov').notNull(),
|
|
48
|
-
title: text('title').notNull(),
|
|
49
|
-
summary: text('summary').notNull(),
|
|
50
|
-
location: text('location').notNull(),
|
|
51
|
-
outfit: text('outfit'),
|
|
52
|
-
kink: text('kink'),
|
|
53
|
-
date: text('date'), // Simple text field
|
|
54
|
-
stats: text('stats'), // Simple text field for JSON
|
|
55
|
-
filePath: text('file_path'),
|
|
56
|
-
createdAt: timestampColumn('created_at'),
|
|
57
|
-
updatedAt: timestampColumn('updated_at'),
|
|
58
|
-
});
|
|
59
|
-
export const embeddings = sqliteTable('embeddings', {
|
|
60
|
-
id: uuidColumn('id'),
|
|
61
|
-
chapterId: text('chapter_id')
|
|
62
|
-
.notNull()
|
|
63
|
-
.references(() => chapters.id),
|
|
64
|
-
content: text('content').notNull(),
|
|
65
|
-
embedding: blob('embedding').notNull(),
|
|
66
|
-
characters: text('characters'), // Simple text field for JSON array
|
|
67
|
-
metadata: text('metadata'), // Simple text field for JSON
|
|
68
|
-
createdAt: timestampColumn('created_at'),
|
|
69
|
-
});
|
package/src/database/vector.d.ts
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
import type { DatabaseType } from './index.js';
|
|
2
|
-
export interface VectorSearchOptions {
|
|
3
|
-
characters?: string[];
|
|
4
|
-
allCharacters?: boolean;
|
|
5
|
-
arc?: string;
|
|
6
|
-
pov?: string;
|
|
7
|
-
limit?: number;
|
|
8
|
-
}
|
|
9
|
-
export interface VectorSearchResult {
|
|
10
|
-
id: string;
|
|
11
|
-
chapterId: string;
|
|
12
|
-
content: string;
|
|
13
|
-
characters: string[];
|
|
14
|
-
metadata: Record<string, unknown>;
|
|
15
|
-
similarity: number;
|
|
16
|
-
}
|
|
17
|
-
export declare class VectorStore {
|
|
18
|
-
private db;
|
|
19
|
-
constructor(db: DatabaseType);
|
|
20
|
-
insert(chapterId: string, content: string, embedding: Float32Array, characters?: string[], metadata?: Record<string, unknown>): Promise<string>;
|
|
21
|
-
search(queryEmbedding: Float32Array, options?: VectorSearchOptions): Promise<VectorSearchResult[]>;
|
|
22
|
-
private cosineSimilarity;
|
|
23
|
-
deleteByChapter(chapterId: string): Promise<void>;
|
|
24
|
-
getCharacters(character: string): Promise<string[]>;
|
|
25
|
-
}
|
package/src/database/vector.js
DELETED
|
@@ -1,98 +0,0 @@
|
|
|
1
|
-
import { eq, sql } from 'drizzle-orm';
|
|
2
|
-
import { embeddings } from './schema.js';
|
|
3
|
-
export class VectorStore {
|
|
4
|
-
db;
|
|
5
|
-
constructor(db) {
|
|
6
|
-
this.db = db;
|
|
7
|
-
}
|
|
8
|
-
async insert(chapterId, content, embedding, characters = [], metadata = {}) {
|
|
9
|
-
const id = crypto.randomUUID();
|
|
10
|
-
await this.db.insert(embeddings).values({
|
|
11
|
-
id,
|
|
12
|
-
chapterId,
|
|
13
|
-
content,
|
|
14
|
-
embedding: Buffer.from(embedding.buffer),
|
|
15
|
-
characters: JSON.stringify(characters),
|
|
16
|
-
metadata: JSON.stringify(metadata),
|
|
17
|
-
});
|
|
18
|
-
return id;
|
|
19
|
-
}
|
|
20
|
-
async search(queryEmbedding, options = {}) {
|
|
21
|
-
const { characters, allCharacters = false, limit = 10 } = options;
|
|
22
|
-
// Start with base query
|
|
23
|
-
const baseQuery = this.db
|
|
24
|
-
.select({
|
|
25
|
-
id: embeddings.id,
|
|
26
|
-
chapterId: embeddings.chapterId,
|
|
27
|
-
content: embeddings.content,
|
|
28
|
-
characters: embeddings.characters,
|
|
29
|
-
metadata: embeddings.metadata,
|
|
30
|
-
embedding: embeddings.embedding,
|
|
31
|
-
})
|
|
32
|
-
.from(embeddings);
|
|
33
|
-
// Apply character filters if needed
|
|
34
|
-
let query = baseQuery;
|
|
35
|
-
if (characters?.length) {
|
|
36
|
-
if (allCharacters) {
|
|
37
|
-
// All characters must be present (AND)
|
|
38
|
-
for (const char of characters) {
|
|
39
|
-
// @ts-expect-error - Drizzle beta type issues
|
|
40
|
-
query = query.where(sql `json_extract(${embeddings.characters}, '$') LIKE ${`%${char}%`}`);
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
else {
|
|
44
|
-
// At least one character must be present (OR)
|
|
45
|
-
const charConditions = characters.map((char) => sql `json_extract(${embeddings.characters}, '$') LIKE ${`%${char}%`}`);
|
|
46
|
-
// @ts-expect-error - Drizzle beta type issues
|
|
47
|
-
query = query.where(sql `(${sql.join(charConditions, sql ` OR `)})`);
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
const results = await query;
|
|
51
|
-
// Calculate cosine similarity in JavaScript
|
|
52
|
-
const resultsWithSimilarity = results.map((row) => {
|
|
53
|
-
const rowEmbedding = new Float32Array(row.embedding.buffer);
|
|
54
|
-
const similarity = this.cosineSimilarity(queryEmbedding, rowEmbedding);
|
|
55
|
-
return {
|
|
56
|
-
id: row.id,
|
|
57
|
-
chapterId: row.chapterId,
|
|
58
|
-
content: row.content,
|
|
59
|
-
characters: JSON.parse(row.characters || '[]'),
|
|
60
|
-
metadata: JSON.parse(row.metadata || '{}'),
|
|
61
|
-
similarity,
|
|
62
|
-
};
|
|
63
|
-
});
|
|
64
|
-
// Sort by similarity and limit
|
|
65
|
-
return resultsWithSimilarity.sort((a, b) => b.similarity - a.similarity).slice(0, limit);
|
|
66
|
-
}
|
|
67
|
-
cosineSimilarity(a, b) {
|
|
68
|
-
if (a.length !== b.length)
|
|
69
|
-
return 0;
|
|
70
|
-
let dotProduct = 0;
|
|
71
|
-
let normA = 0;
|
|
72
|
-
let normB = 0;
|
|
73
|
-
for (let i = 0; i < a.length; i++) {
|
|
74
|
-
dotProduct += a[i] * b[i];
|
|
75
|
-
normA += a[i] * a[i];
|
|
76
|
-
normB += b[i] * b[i];
|
|
77
|
-
}
|
|
78
|
-
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
79
|
-
}
|
|
80
|
-
async deleteByChapter(chapterId) {
|
|
81
|
-
await this.db.delete(embeddings).where(eq(embeddings.chapterId, chapterId));
|
|
82
|
-
}
|
|
83
|
-
async getCharacters(character) {
|
|
84
|
-
const results = await this.db
|
|
85
|
-
.select({ characters: embeddings.characters })
|
|
86
|
-
.from(embeddings)
|
|
87
|
-
.where(sql `json_extract(${embeddings.characters}, '$') LIKE ${`%${character}%`}`);
|
|
88
|
-
const allCharacters = new Set();
|
|
89
|
-
for (const row of results) {
|
|
90
|
-
const chars = JSON.parse(row.characters || '[]');
|
|
91
|
-
for (const char of chars) {
|
|
92
|
-
allCharacters.add(char);
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
allCharacters.delete(character); // Remove the query character
|
|
96
|
-
return Array.from(allCharacters).sort();
|
|
97
|
-
}
|
|
98
|
-
}
|
package/src/index.d.ts
DELETED
package/src/index.js
DELETED
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Improved Named Entity Recognition for Character Detection
|
|
3
|
-
* Optimized for Italian storytelling content
|
|
4
|
-
*/
|
|
5
|
-
import type { ChapterMetadata } from '../types/frontmatter.js';
|
|
6
|
-
export interface CharacterNER {
|
|
7
|
-
extractCharacters(text: string, metadata?: Partial<ChapterMetadata>): string[];
|
|
8
|
-
extractMainCharacters(chapters: Array<{
|
|
9
|
-
content: string;
|
|
10
|
-
metadata: Partial<ChapterMetadata>;
|
|
11
|
-
}>): string[];
|
|
12
|
-
}
|
|
13
|
-
/**
|
|
14
|
-
* Rule-based NER optimized for Italian names and storytelling
|
|
15
|
-
*/
|
|
16
|
-
export declare class ItalianCharacterNER implements CharacterNER {
|
|
17
|
-
private commonWords;
|
|
18
|
-
private namePatterns;
|
|
19
|
-
private dialoguePatterns;
|
|
20
|
-
constructor();
|
|
21
|
-
extractCharacters(text: string, metadata?: Partial<ChapterMetadata>): string[];
|
|
22
|
-
extractMainCharacters(chapters: Array<{
|
|
23
|
-
content: string;
|
|
24
|
-
metadata: Partial<ChapterMetadata>;
|
|
25
|
-
}>): string[];
|
|
26
|
-
private extractFromOutfit;
|
|
27
|
-
private extractFromDialogue;
|
|
28
|
-
private extractFromPatterns;
|
|
29
|
-
private isValidName;
|
|
30
|
-
private isValidCharacterName;
|
|
31
|
-
private normalizeName;
|
|
32
|
-
}
|
|
33
|
-
/**
|
|
34
|
-
* Factory function to create character NER
|
|
35
|
-
*/
|
|
36
|
-
export declare function createCharacterNER(): CharacterNER;
|
package/src/rag/character-ner.js
DELETED
|
@@ -1,416 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Improved Named Entity Recognition for Character Detection
|
|
3
|
-
* Optimized for Italian storytelling content
|
|
4
|
-
*/
|
|
5
|
-
/**
|
|
6
|
-
* Rule-based NER optimized for Italian names and storytelling
|
|
7
|
-
*/
|
|
8
|
-
export class ItalianCharacterNER {
|
|
9
|
-
commonWords;
|
|
10
|
-
namePatterns;
|
|
11
|
-
dialoguePatterns;
|
|
12
|
-
constructor() {
|
|
13
|
-
// Common Italian words to exclude (expanded list)
|
|
14
|
-
this.commonWords = new Set([
|
|
15
|
-
// Pronouns and articles
|
|
16
|
-
'io',
|
|
17
|
-
'tu',
|
|
18
|
-
'lui',
|
|
19
|
-
'lei',
|
|
20
|
-
'noi',
|
|
21
|
-
'voi',
|
|
22
|
-
'loro',
|
|
23
|
-
'il',
|
|
24
|
-
'la',
|
|
25
|
-
'lo',
|
|
26
|
-
'gli',
|
|
27
|
-
'le',
|
|
28
|
-
'un',
|
|
29
|
-
'una',
|
|
30
|
-
'uno',
|
|
31
|
-
// Common verbs
|
|
32
|
-
'sono',
|
|
33
|
-
'sei',
|
|
34
|
-
'è',
|
|
35
|
-
'siamo',
|
|
36
|
-
'siete',
|
|
37
|
-
'hanno',
|
|
38
|
-
'ho',
|
|
39
|
-
'hai',
|
|
40
|
-
'ha',
|
|
41
|
-
'abbiamo',
|
|
42
|
-
'avete',
|
|
43
|
-
'fanno',
|
|
44
|
-
'faccio',
|
|
45
|
-
'fai',
|
|
46
|
-
'fa',
|
|
47
|
-
'facciamo',
|
|
48
|
-
'fate',
|
|
49
|
-
'vado',
|
|
50
|
-
'vai',
|
|
51
|
-
'va',
|
|
52
|
-
'andiamo',
|
|
53
|
-
'andate',
|
|
54
|
-
'vanno',
|
|
55
|
-
'dico',
|
|
56
|
-
'dici',
|
|
57
|
-
'dice',
|
|
58
|
-
'diciamo',
|
|
59
|
-
'dite',
|
|
60
|
-
'dicono',
|
|
61
|
-
'vedo',
|
|
62
|
-
'vedi',
|
|
63
|
-
'vede',
|
|
64
|
-
'vediamo',
|
|
65
|
-
'vedete',
|
|
66
|
-
'vedono',
|
|
67
|
-
// Common words and negations
|
|
68
|
-
'non',
|
|
69
|
-
'no',
|
|
70
|
-
'sì',
|
|
71
|
-
'si',
|
|
72
|
-
'ne',
|
|
73
|
-
'ci',
|
|
74
|
-
'vi',
|
|
75
|
-
'mi',
|
|
76
|
-
'ti',
|
|
77
|
-
'se',
|
|
78
|
-
'ma',
|
|
79
|
-
'o',
|
|
80
|
-
'e',
|
|
81
|
-
'che',
|
|
82
|
-
'di',
|
|
83
|
-
'da',
|
|
84
|
-
'in',
|
|
85
|
-
'con',
|
|
86
|
-
'su',
|
|
87
|
-
'per',
|
|
88
|
-
'tra',
|
|
89
|
-
'fra',
|
|
90
|
-
'tutto',
|
|
91
|
-
'tutti',
|
|
92
|
-
'tutte',
|
|
93
|
-
'niente',
|
|
94
|
-
'nulla',
|
|
95
|
-
'qualcosa',
|
|
96
|
-
'qualcuno',
|
|
97
|
-
'qualche',
|
|
98
|
-
'altro',
|
|
99
|
-
'altri',
|
|
100
|
-
'altre',
|
|
101
|
-
'voglio',
|
|
102
|
-
'vuoi',
|
|
103
|
-
'vuole',
|
|
104
|
-
'vogliamo',
|
|
105
|
-
'volete',
|
|
106
|
-
'vogliono',
|
|
107
|
-
'posso',
|
|
108
|
-
'puoi',
|
|
109
|
-
'può',
|
|
110
|
-
'possiamo',
|
|
111
|
-
'potete',
|
|
112
|
-
'possono',
|
|
113
|
-
'devo',
|
|
114
|
-
'devi',
|
|
115
|
-
'deve',
|
|
116
|
-
'dobbiamo',
|
|
117
|
-
'dovete',
|
|
118
|
-
'devono',
|
|
119
|
-
'perfetto',
|
|
120
|
-
'perfetta',
|
|
121
|
-
'perfetti',
|
|
122
|
-
'perfette',
|
|
123
|
-
'sento',
|
|
124
|
-
'senti',
|
|
125
|
-
'sente',
|
|
126
|
-
'sentiamo',
|
|
127
|
-
'sentite',
|
|
128
|
-
'sentono',
|
|
129
|
-
'sto',
|
|
130
|
-
'stai',
|
|
131
|
-
'sta',
|
|
132
|
-
'stiamo',
|
|
133
|
-
'state',
|
|
134
|
-
'stanno',
|
|
135
|
-
'anche',
|
|
136
|
-
'ancora',
|
|
137
|
-
'allora',
|
|
138
|
-
'quindi',
|
|
139
|
-
'però',
|
|
140
|
-
'infatti',
|
|
141
|
-
'comunque',
|
|
142
|
-
'davvero',
|
|
143
|
-
'veramente',
|
|
144
|
-
// Common adjectives/adverbs
|
|
145
|
-
'bene',
|
|
146
|
-
'male',
|
|
147
|
-
'molto',
|
|
148
|
-
'poco',
|
|
149
|
-
'tanto',
|
|
150
|
-
'più',
|
|
151
|
-
'meno',
|
|
152
|
-
'sempre',
|
|
153
|
-
'mai',
|
|
154
|
-
'già',
|
|
155
|
-
'ancora',
|
|
156
|
-
'oggi',
|
|
157
|
-
'ieri',
|
|
158
|
-
'domani',
|
|
159
|
-
'quando',
|
|
160
|
-
'dove',
|
|
161
|
-
'come',
|
|
162
|
-
'perché',
|
|
163
|
-
'perch',
|
|
164
|
-
'cosa',
|
|
165
|
-
'chi',
|
|
166
|
-
'quale',
|
|
167
|
-
'quanto',
|
|
168
|
-
'ogni',
|
|
169
|
-
'ogni',
|
|
170
|
-
'solo',
|
|
171
|
-
'prima',
|
|
172
|
-
'dopo',
|
|
173
|
-
'mentre',
|
|
174
|
-
'durante',
|
|
175
|
-
'contro',
|
|
176
|
-
'senza',
|
|
177
|
-
'dentro',
|
|
178
|
-
'fuori',
|
|
179
|
-
'sopra',
|
|
180
|
-
'sotto',
|
|
181
|
-
// Common nouns
|
|
182
|
-
'casa',
|
|
183
|
-
'tempo',
|
|
184
|
-
'giorno',
|
|
185
|
-
'notte',
|
|
186
|
-
'mattina',
|
|
187
|
-
'sera',
|
|
188
|
-
'anno',
|
|
189
|
-
'mese',
|
|
190
|
-
'settimana',
|
|
191
|
-
'ora',
|
|
192
|
-
'minuto',
|
|
193
|
-
'occhi',
|
|
194
|
-
'mano',
|
|
195
|
-
'mani',
|
|
196
|
-
'testa',
|
|
197
|
-
'cuore',
|
|
198
|
-
'mente',
|
|
199
|
-
'corpo',
|
|
200
|
-
'voce',
|
|
201
|
-
'parole',
|
|
202
|
-
'parola',
|
|
203
|
-
// Locations (generic)
|
|
204
|
-
'milano',
|
|
205
|
-
'roma',
|
|
206
|
-
'londra',
|
|
207
|
-
'italia',
|
|
208
|
-
'inghilterra',
|
|
209
|
-
'città',
|
|
210
|
-
'paese',
|
|
211
|
-
'posto',
|
|
212
|
-
'luogo',
|
|
213
|
-
// Common expressions
|
|
214
|
-
'cazzo',
|
|
215
|
-
'merda',
|
|
216
|
-
'cristo',
|
|
217
|
-
'dio',
|
|
218
|
-
'madonna',
|
|
219
|
-
'boh',
|
|
220
|
-
'ecco',
|
|
221
|
-
'allora',
|
|
222
|
-
'però',
|
|
223
|
-
'quindi',
|
|
224
|
-
'infatti',
|
|
225
|
-
'comunque',
|
|
226
|
-
'insomma',
|
|
227
|
-
'davvero',
|
|
228
|
-
'veramente',
|
|
229
|
-
'sicuramente',
|
|
230
|
-
'probabilmente',
|
|
231
|
-
// English common words (mixed content)
|
|
232
|
-
'the',
|
|
233
|
-
'and',
|
|
234
|
-
'you',
|
|
235
|
-
'that',
|
|
236
|
-
'was',
|
|
237
|
-
'for',
|
|
238
|
-
'are',
|
|
239
|
-
'with',
|
|
240
|
-
'his',
|
|
241
|
-
'they',
|
|
242
|
-
'this',
|
|
243
|
-
'have',
|
|
244
|
-
'from',
|
|
245
|
-
'not',
|
|
246
|
-
'but',
|
|
247
|
-
'what',
|
|
248
|
-
'can',
|
|
249
|
-
'out',
|
|
250
|
-
'other',
|
|
251
|
-
'were',
|
|
252
|
-
'all',
|
|
253
|
-
'your',
|
|
254
|
-
'when',
|
|
255
|
-
'said',
|
|
256
|
-
'there',
|
|
257
|
-
'each',
|
|
258
|
-
'which',
|
|
259
|
-
'she',
|
|
260
|
-
'how',
|
|
261
|
-
'will',
|
|
262
|
-
'about',
|
|
263
|
-
'get',
|
|
264
|
-
'made',
|
|
265
|
-
'may',
|
|
266
|
-
]);
|
|
267
|
-
// Patterns for Italian names
|
|
268
|
-
this.namePatterns = [
|
|
269
|
-
/\b[A-Z][a-z]{2,}(?:\s+[A-Z][a-z]{2,})?\b/g, // Capitalized words (potential names)
|
|
270
|
-
];
|
|
271
|
-
// Dialogue patterns to identify speakers
|
|
272
|
-
this.dialoguePatterns = [
|
|
273
|
-
/"([^"]*)",?\s*(?:dice|chiede|risponde|sussurra|grida)\s+([A-Z][a-z]+)/gi,
|
|
274
|
-
/([A-Z][a-z]+)\s*:\s*"([^"]*)"/gi,
|
|
275
|
-
/([A-Z][a-z]+)\s+(?:dice|chiede|risponde|sussurra|grida)\s*:/gi,
|
|
276
|
-
];
|
|
277
|
-
}
|
|
278
|
-
extractCharacters(text, metadata) {
|
|
279
|
-
const characters = new Set();
|
|
280
|
-
// 1. Add POV character if available
|
|
281
|
-
if (metadata?.pov && this.isValidName(metadata.pov)) {
|
|
282
|
-
characters.add(this.normalizeName(metadata.pov));
|
|
283
|
-
}
|
|
284
|
-
// 2. Extract from outfit metadata (reliable source)
|
|
285
|
-
if (metadata?.outfit) {
|
|
286
|
-
const outfitNames = this.extractFromOutfit(metadata.outfit);
|
|
287
|
-
for (const name of outfitNames) {
|
|
288
|
-
characters.add(name);
|
|
289
|
-
}
|
|
290
|
-
}
|
|
291
|
-
// 3. Extract from dialogue patterns (high confidence)
|
|
292
|
-
const dialogueNames = this.extractFromDialogue(text);
|
|
293
|
-
for (const name of dialogueNames) {
|
|
294
|
-
characters.add(name);
|
|
295
|
-
}
|
|
296
|
-
// 4. Extract from name patterns with frequency filtering
|
|
297
|
-
const patternNames = this.extractFromPatterns(text);
|
|
298
|
-
for (const name of patternNames) {
|
|
299
|
-
characters.add(name);
|
|
300
|
-
}
|
|
301
|
-
// 5. Filter and validate results
|
|
302
|
-
const validCharacters = Array.from(characters)
|
|
303
|
-
.filter((name) => this.isValidCharacterName(name))
|
|
304
|
-
.slice(0, 8); // Limit to 8 most likely characters
|
|
305
|
-
return validCharacters;
|
|
306
|
-
}
|
|
307
|
-
extractMainCharacters(chapters) {
|
|
308
|
-
const characterFreq = new Map();
|
|
309
|
-
const characterContexts = new Map();
|
|
310
|
-
// Extract characters from all chapters
|
|
311
|
-
chapters.forEach((chapter) => {
|
|
312
|
-
const chars = this.extractCharacters(chapter.content, chapter.metadata);
|
|
313
|
-
chars.forEach((char) => {
|
|
314
|
-
characterFreq.set(char, (characterFreq.get(char) || 0) + 1);
|
|
315
|
-
if (!characterContexts.has(char)) {
|
|
316
|
-
characterContexts.set(char, new Set());
|
|
317
|
-
}
|
|
318
|
-
characterContexts.get(char)?.add(chapter.metadata.arc || 'unknown');
|
|
319
|
-
});
|
|
320
|
-
});
|
|
321
|
-
// Score characters based on frequency and context diversity
|
|
322
|
-
const scoredCharacters = Array.from(characterFreq.entries())
|
|
323
|
-
.map(([name, freq]) => {
|
|
324
|
-
const contexts = characterContexts.get(name)?.size || 0;
|
|
325
|
-
const score = freq * (1 + contexts * 0.5); // Bonus for appearing in multiple arcs
|
|
326
|
-
return { name, freq, contexts, score };
|
|
327
|
-
})
|
|
328
|
-
.filter((char) => char.freq >= 3) // Must appear in at least 3 chapters
|
|
329
|
-
.sort((a, b) => b.score - a.score);
|
|
330
|
-
return scoredCharacters.slice(0, 20).map((char) => char.name);
|
|
331
|
-
}
|
|
332
|
-
extractFromOutfit(outfit) {
|
|
333
|
-
const names = [];
|
|
334
|
-
const matches = outfit.match(/([A-Z][a-z]+):/g);
|
|
335
|
-
if (matches) {
|
|
336
|
-
matches.forEach((match) => {
|
|
337
|
-
const name = match.replace(':', '').trim();
|
|
338
|
-
if (this.isValidName(name)) {
|
|
339
|
-
names.push(this.normalizeName(name));
|
|
340
|
-
}
|
|
341
|
-
});
|
|
342
|
-
}
|
|
343
|
-
return names;
|
|
344
|
-
}
|
|
345
|
-
extractFromDialogue(text) {
|
|
346
|
-
const names = new Set();
|
|
347
|
-
this.dialoguePatterns.forEach((pattern) => {
|
|
348
|
-
let match = pattern.exec(text);
|
|
349
|
-
while (match !== null) {
|
|
350
|
-
// Extract potential names from different capture groups
|
|
351
|
-
for (let i = 1; i < match.length; i++) {
|
|
352
|
-
const potential = match[i];
|
|
353
|
-
if (potential && this.isValidName(potential)) {
|
|
354
|
-
names.add(this.normalizeName(potential));
|
|
355
|
-
}
|
|
356
|
-
}
|
|
357
|
-
match = pattern.exec(text);
|
|
358
|
-
}
|
|
359
|
-
});
|
|
360
|
-
return Array.from(names);
|
|
361
|
-
}
|
|
362
|
-
extractFromPatterns(text) {
|
|
363
|
-
const nameFreq = new Map();
|
|
364
|
-
this.namePatterns.forEach((pattern) => {
|
|
365
|
-
let match = pattern.exec(text);
|
|
366
|
-
while (match !== null) {
|
|
367
|
-
const name = match[0].trim();
|
|
368
|
-
if (this.isValidName(name)) {
|
|
369
|
-
nameFreq.set(name, (nameFreq.get(name) || 0) + 1);
|
|
370
|
-
}
|
|
371
|
-
match = pattern.exec(text);
|
|
372
|
-
}
|
|
373
|
-
});
|
|
374
|
-
// Return names that appear multiple times
|
|
375
|
-
return Array.from(nameFreq.entries())
|
|
376
|
-
.filter(([_, freq]) => freq >= 2)
|
|
377
|
-
.map(([name, _]) => name);
|
|
378
|
-
}
|
|
379
|
-
isValidName(name) {
|
|
380
|
-
if (!name || name.length < 2 || name.length > 20)
|
|
381
|
-
return false;
|
|
382
|
-
// Must start with capital letter
|
|
383
|
-
if (name[0] !== name[0].toUpperCase())
|
|
384
|
-
return false;
|
|
385
|
-
// Check against common words
|
|
386
|
-
if (this.commonWords.has(name.toLowerCase()))
|
|
387
|
-
return false;
|
|
388
|
-
// Must contain only letters (and spaces for compound names)
|
|
389
|
-
if (!/^[A-Za-z\s]+$/.test(name))
|
|
390
|
-
return false;
|
|
391
|
-
return true;
|
|
392
|
-
}
|
|
393
|
-
isValidCharacterName(name) {
|
|
394
|
-
if (!this.isValidName(name))
|
|
395
|
-
return false;
|
|
396
|
-
// Additional checks for character names
|
|
397
|
-
// Exclude obvious non-names
|
|
398
|
-
const excludePatterns = [
|
|
399
|
-
/^(chapter|capitolo|episodio|parte|arc|timeline)$/i,
|
|
400
|
-
/^(monday|tuesday|wednesday|thursday|friday|saturday|sunday)$/i,
|
|
401
|
-
/^(gennaio|febbraio|marzo|aprile|maggio|giugno|luglio|agosto|settembre|ottobre|novembre|dicembre)$/i,
|
|
402
|
-
/^(january|february|march|april|may|june|july|august|september|october|november|december)$/i,
|
|
403
|
-
];
|
|
404
|
-
return !excludePatterns.some((pattern) => pattern.test(name));
|
|
405
|
-
}
|
|
406
|
-
normalizeName(name) {
|
|
407
|
-
// Normalize case: first letter uppercase, rest lowercase
|
|
408
|
-
return name.charAt(0).toUpperCase() + name.slice(1).toLowerCase();
|
|
409
|
-
}
|
|
410
|
-
}
|
|
411
|
-
/**
|
|
412
|
-
* Factory function to create character NER
|
|
413
|
-
*/
|
|
414
|
-
export function createCharacterNER() {
|
|
415
|
-
return new ItalianCharacterNER();
|
|
416
|
-
}
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Database synchronization utilities for HybridRAG
|
|
3
|
-
* Ensures timeline/arc/episode/chapter records exist before indexing
|
|
4
|
-
*/
|
|
5
|
-
import type { DatabaseType } from '../database/index.js';
|
|
6
|
-
export interface ChapterRecord {
|
|
7
|
-
chapterId: string;
|
|
8
|
-
timeline: string;
|
|
9
|
-
arc: string;
|
|
10
|
-
episode: number;
|
|
11
|
-
chapter: number;
|
|
12
|
-
pov: string;
|
|
13
|
-
title?: string;
|
|
14
|
-
summary?: string;
|
|
15
|
-
location?: string;
|
|
16
|
-
filePath?: string;
|
|
17
|
-
}
|
|
18
|
-
export declare class DatabaseSync {
|
|
19
|
-
private db;
|
|
20
|
-
constructor(db: DatabaseType);
|
|
21
|
-
/**
|
|
22
|
-
* Ensure all required database records exist for chapters
|
|
23
|
-
*/
|
|
24
|
-
syncChapters(chapterRecords: ChapterRecord[]): Promise<{
|
|
25
|
-
timelines: number;
|
|
26
|
-
arcs: number;
|
|
27
|
-
episodes: number;
|
|
28
|
-
chapters: number;
|
|
29
|
-
}>;
|
|
30
|
-
/**
|
|
31
|
-
* Get chapter ID from database for embedding insertion
|
|
32
|
-
*/
|
|
33
|
-
getChapterId(timeline: string, arc: string, episode: number, chapter: number): Promise<string | null>;
|
|
34
|
-
/**
|
|
35
|
-
* Clean up orphaned embeddings
|
|
36
|
-
*/
|
|
37
|
-
cleanupEmbeddings(): Promise<number>;
|
|
38
|
-
}
|