pi-doc-injector 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -1
- package/cache.ts +79 -0
- package/commands.ts +68 -1
- package/config.ts +63 -28
- package/globber.ts +48 -0
- package/index.ts +171 -22
- package/injector.ts +18 -1
- package/keyword-gen.ts +142 -0
- package/keyword-llm.ts +57 -0
- package/matcher.ts +14 -10
- package/package.json +5 -1
- package/picomatch.d.ts +11 -0
- package/registry.ts +361 -72
- package/types.ts +62 -3
package/keyword-gen.ts
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local keyword generation — extracts keywords from filenames and content
|
|
3
|
+
* when no frontmatter is available.
|
|
4
|
+
*
|
|
5
|
+
* Extraction sources:
|
|
6
|
+
* 1. Filename parts (split on -, _, .)
|
|
7
|
+
* 2. Markdown headings (# Title, ## Title, etc.)
|
|
8
|
+
* 3. Code symbols (function, class, const, interface, type, enum)
|
|
9
|
+
*
|
|
10
|
+
* All keywords are lowercased, deduplicated, and filtered through a stop-word list.
|
|
11
|
+
* Output is capped at 20 keywords.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
const STOP_WORDS = new Set<string>([
|
|
15
|
+
// Articles
|
|
16
|
+
"a", "an", "the",
|
|
17
|
+
// Pronouns
|
|
18
|
+
"i", "you", "he", "she", "it", "we", "they",
|
|
19
|
+
"me", "him", "her", "us", "them",
|
|
20
|
+
"my", "your", "his", "its", "our", "their",
|
|
21
|
+
"this", "that", "these", "those",
|
|
22
|
+
"who", "whom", "whose", "which", "what",
|
|
23
|
+
// Prepositions
|
|
24
|
+
"in", "on", "at", "by", "for", "with", "about",
|
|
25
|
+
"to", "from", "of", "into", "onto", "upon",
|
|
26
|
+
"over", "under", "between", "among", "through",
|
|
27
|
+
"during", "before", "after", "above", "below",
|
|
28
|
+
"up", "down", "out", "off",
|
|
29
|
+
// Conjunctions
|
|
30
|
+
"and", "but", "or", "nor", "so", "yet", "for",
|
|
31
|
+
"if", "then", "than", "as", "when", "while",
|
|
32
|
+
"because", "since", "although", "though",
|
|
33
|
+
// Auxiliary/modal verbs
|
|
34
|
+
"is", "are", "was", "were", "be", "been", "being",
|
|
35
|
+
"have", "has", "had", "having",
|
|
36
|
+
"do", "does", "did", "doing",
|
|
37
|
+
"will", "would", "shall", "should", "can", "could",
|
|
38
|
+
"may", "might", "must",
|
|
39
|
+
// Common adverbs
|
|
40
|
+
"not", "no", "yes",
|
|
41
|
+
"just", "only", "also", "too", "very", "now", "then",
|
|
42
|
+
"here", "there", "where", "how", "why",
|
|
43
|
+
"all", "each", "every", "both", "few", "more", "most",
|
|
44
|
+
"some", "any", "other", "another", "such",
|
|
45
|
+
"much", "many", "little", "less",
|
|
46
|
+
// Common content-less words
|
|
47
|
+
"get", "set", "put", "use", "make", "see", "need",
|
|
48
|
+
"one", "two", "three", "first", "second", "third",
|
|
49
|
+
"using", "used", "into", "onto", "new",
|
|
50
|
+
"note", "notes", "example", "examples", "todo",
|
|
51
|
+
]);
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Generate up to 20 keywords from a file's name and content.
|
|
55
|
+
*
|
|
56
|
+
* Sources (in order, each adds keywords until cap is reached):
|
|
57
|
+
* 1. Filename parts — split on `-`, `_`, and `.`, keep segments ≥ 3 chars
|
|
58
|
+
* 2. Markdown headings — text after `#` markers
|
|
59
|
+
* 3. Code symbols — function/class/const/interface/type/enum declarations
|
|
60
|
+
*
|
|
61
|
+
* Each candidate is lowercased, filtered through a stop-word list, deduplicated,
|
|
62
|
+
* and limited to words with ≥ 3 characters.
|
|
63
|
+
*
|
|
64
|
+
* @param fileName - The basename of the file (e.g. "api-authentication.md")
|
|
65
|
+
* @param content - The full file content
|
|
66
|
+
* @returns Up to 20 deduplicated keyword strings
|
|
67
|
+
*/
|
|
68
|
+
export function generateKeywords(
|
|
69
|
+
fileName: string,
|
|
70
|
+
content: string,
|
|
71
|
+
): string[] {
|
|
72
|
+
const keywords: string[] = [];
|
|
73
|
+
|
|
74
|
+
// Source 1: Filename parts
|
|
75
|
+
addFromFilename(fileName, keywords);
|
|
76
|
+
|
|
77
|
+
// Source 2: Markdown headings
|
|
78
|
+
addFromHeadings(content, keywords);
|
|
79
|
+
|
|
80
|
+
// Source 3: Code symbols
|
|
81
|
+
addFromCodeSymbols(content, keywords);
|
|
82
|
+
|
|
83
|
+
// Deduplicate while preserving order
|
|
84
|
+
const seen = new Set<string>();
|
|
85
|
+
const result: string[] = [];
|
|
86
|
+
for (const kw of keywords) {
|
|
87
|
+
const lower = kw.toLowerCase();
|
|
88
|
+
if (seen.has(lower)) continue;
|
|
89
|
+
seen.add(lower);
|
|
90
|
+
result.push(kw);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return result.slice(0, 20);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/** Extract keyword candidates from filename parts. */
|
|
97
|
+
function addFromFilename(fileName: string, out: string[]): void {
|
|
98
|
+
// Strip extension(s)
|
|
99
|
+
const nameWithoutExt = fileName.replace(/\.[^.]+$/, "");
|
|
100
|
+
|
|
101
|
+
// Split on common delimiters
|
|
102
|
+
const parts = nameWithoutExt.split(/[-_.\s]+/);
|
|
103
|
+
|
|
104
|
+
for (const part of parts) {
|
|
105
|
+
const cleaned = part.replace(/[^a-zA-Z0-9]/g, "").toLowerCase();
|
|
106
|
+
if (cleaned.length >= 3 && !STOP_WORDS.has(cleaned)) {
|
|
107
|
+
out.push(cleaned);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/** Extract keyword candidates from markdown headings (#, ##, ###, etc.). */
|
|
113
|
+
function addFromHeadings(content: string, out: string[]): void {
|
|
114
|
+
const headingRegex = /^#{1,6}\s+(.+)$/gm;
|
|
115
|
+
let match: RegExpExecArray | null;
|
|
116
|
+
while ((match = headingRegex.exec(content)) !== null) {
|
|
117
|
+
const headingText = match[1].trim();
|
|
118
|
+
// Split heading into words
|
|
119
|
+
const words = headingText.split(/\s+/);
|
|
120
|
+
for (const word of words) {
|
|
121
|
+
const cleaned = word.replace(/[^a-zA-Z0-9]/g, "").toLowerCase();
|
|
122
|
+
if (cleaned.length >= 3 && !STOP_WORDS.has(cleaned)) {
|
|
123
|
+
out.push(cleaned);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/** Extract keyword candidates from code symbol declarations. */
|
|
130
|
+
function addFromCodeSymbols(content: string, out: string[]): void {
|
|
131
|
+
// Match: function name, class name, const name, interface name, type name, enum name
|
|
132
|
+
// Also: export function, export class, export const, etc.
|
|
133
|
+
const symbolRegex = /(?:export\s+)?(?:async\s+)?(?:function|class|const|interface|type|enum)\s+(\w+)/gm;
|
|
134
|
+
let match: RegExpExecArray | null;
|
|
135
|
+
while ((match = symbolRegex.exec(content)) !== null) {
|
|
136
|
+
const name = match[1];
|
|
137
|
+
const cleaned = name.toLowerCase();
|
|
138
|
+
if (cleaned.length >= 3 && !STOP_WORDS.has(cleaned)) {
|
|
139
|
+
out.push(cleaned);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
package/keyword-llm.ts
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM Keyword Generation — builds prompts for the LLM to generate keywords
|
|
3
|
+
* for documentation files via the _doc_injector_keywords tool.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/** Input for a single file in a keyword generation batch. */
|
|
7
|
+
export interface FileInput {
|
|
8
|
+
/** Path relative to cwd (e.g. "docs/api.md") */
|
|
9
|
+
path: string;
|
|
10
|
+
/** First ~500 chars of the file content as context */
|
|
11
|
+
snippet: string;
|
|
12
|
+
/** Existing keywords (from frontmatter/heuristic), so LLM augments not replaces */
|
|
13
|
+
existingKeywords: string[];
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Build a user message prompt instructing the LLM to generate keywords
|
|
18
|
+
* for a batch of documentation files by calling the _doc_injector_keywords tool.
|
|
19
|
+
*
|
|
20
|
+
* The prompt asks the LLM to read each file's snippet and produce 3-10 concise,
|
|
21
|
+
* searchable keywords per file, incorporating any existing keywords.
|
|
22
|
+
*/
|
|
23
|
+
export function buildKeywordGenPrompt(files: FileInput[]): string {
|
|
24
|
+
if (files.length === 0) return "";
|
|
25
|
+
|
|
26
|
+
const fileDescriptions = files.map((f, i) => {
|
|
27
|
+
const existing = f.existingKeywords.length > 0
|
|
28
|
+
? ` Existing keywords: ${f.existingKeywords.join(", ")}`
|
|
29
|
+
: "";
|
|
30
|
+
// Escape markdown special chars in path to prevent prompt injection
|
|
31
|
+
const safePath = f.path.replace(/[*`\[\]]/g, "\\$&");
|
|
32
|
+
// Escape backticks in snippet to prevent breaking code fences
|
|
33
|
+
const safeSnippet = f.snippet.replace(/```/g, "'''");
|
|
34
|
+
return `${i + 1}. **${safePath}**\n${existing}\n Snippet:\n\`\`\`\n${safeSnippet}\n\`\`\``;
|
|
35
|
+
}).join("\n\n");
|
|
36
|
+
|
|
37
|
+
const expectedOutput = files.map((f) => {
|
|
38
|
+
const safePath = f.path.replace(/[*`\[\]]/g, "\\$&");
|
|
39
|
+
return ` - "${safePath}": keywords array incorporating relevant existing keywords [${f.existingKeywords.slice(0, 5).map(k => `"${k}"`).join(", ")}${f.existingKeywords.length > 5 ? ", ..." : ""}]`;
|
|
40
|
+
}).join("\n");
|
|
41
|
+
|
|
42
|
+
return `Generate documentation keywords for the following ${files.length} file(s). For each file, read the snippet and produce 3-10 concise, searchable keywords that someone might type when looking for this documentation.
|
|
43
|
+
|
|
44
|
+
Rules:
|
|
45
|
+
- Keywords should be lowercase, 3+ characters, no stop-words
|
|
46
|
+
- Incorporate any existing keywords that are still relevant
|
|
47
|
+
- Focus on the document's core topic, not generic terms
|
|
48
|
+
- Prefer specific technical terms over vague ones
|
|
49
|
+
|
|
50
|
+
Files:
|
|
51
|
+
${fileDescriptions}
|
|
52
|
+
|
|
53
|
+
After analysis, call the \`_doc_injector_keywords\` tool with a \`keywords\` array like:
|
|
54
|
+
${expectedOutput}
|
|
55
|
+
|
|
56
|
+
Do not output any other text — just call the tool with the keywords.`;
|
|
57
|
+
}
|
package/matcher.ts
CHANGED
|
@@ -31,6 +31,10 @@ export function extractText(content: unknown): string {
|
|
|
31
31
|
export class KeywordMatcher {
|
|
32
32
|
private options: MatcherOptions;
|
|
33
33
|
|
|
34
|
+
/**
|
|
35
|
+
* @param entries - The document entries to match against
|
|
36
|
+
* @param options - Optional matcher settings (merged with defaults)
|
|
37
|
+
*/
|
|
34
38
|
constructor(private entries: DocEntry[], options?: Partial<MatcherOptions>) {
|
|
35
39
|
this.options = { ...DEFAULT_MATCHER_OPTIONS, ...options };
|
|
36
40
|
}
|
|
@@ -44,8 +48,13 @@ export class KeywordMatcher {
|
|
|
44
48
|
for (const entry of this.entries) {
|
|
45
49
|
if (entry.injected) continue;
|
|
46
50
|
|
|
51
|
+
// Skip entries with no keywords (empty array or falsy)
|
|
52
|
+
if (!entry.keywords || entry.keywords.length === 0) continue;
|
|
53
|
+
|
|
47
54
|
const matchedKeywords: string[] = [];
|
|
48
55
|
for (const keyword of entry.keywords) {
|
|
56
|
+
// Skip empty keywords — they'd match everything with word boundaries
|
|
57
|
+
if (!keyword || keyword.trim().length === 0) continue;
|
|
49
58
|
if (this.keywordMatches(text, keyword)) {
|
|
50
59
|
matchedKeywords.push(keyword);
|
|
51
60
|
}
|
|
@@ -63,18 +72,13 @@ export class KeywordMatcher {
|
|
|
63
72
|
return results;
|
|
64
73
|
}
|
|
65
74
|
|
|
75
|
+
/**
|
|
76
|
+
* Check if a single keyword matches the given text.
|
|
77
|
+
* Uses simple substring inclusion (case-insensitive by default).
|
|
78
|
+
*/
|
|
66
79
|
private keywordMatches(text: string, keyword: string): boolean {
|
|
67
80
|
const search = this.options.caseSensitive ? text : text.toLowerCase();
|
|
68
81
|
const kw = this.options.caseSensitive ? keyword : keyword.toLowerCase();
|
|
69
|
-
|
|
70
|
-
if (this.options.wordBoundary) {
|
|
71
|
-
// Escape special regex chars in keyword, then apply word boundary
|
|
72
|
-
const escaped = kw.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
73
|
-
const flags = this.options.caseSensitive ? "" : "i";
|
|
74
|
-
const regex = new RegExp(`\\b${escaped}\\b`, flags);
|
|
75
|
-
return regex.test(search);
|
|
76
|
-
}
|
|
77
|
-
|
|
78
82
|
return search.includes(kw);
|
|
79
83
|
}
|
|
80
|
-
}
|
|
84
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-doc-injector",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "Auto-inject relevant project documentation into Pi's LLM context based on keyword matching",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./index.ts",
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
},
|
|
10
10
|
"files": [
|
|
11
11
|
"*.ts",
|
|
12
|
+
"*.d.ts",
|
|
12
13
|
"docs/**/*.md",
|
|
13
14
|
"README.md"
|
|
14
15
|
],
|
|
@@ -33,6 +34,9 @@
|
|
|
33
34
|
"./index.ts"
|
|
34
35
|
]
|
|
35
36
|
},
|
|
37
|
+
"dependencies": {
|
|
38
|
+
"picomatch": "^4.0.2"
|
|
39
|
+
},
|
|
36
40
|
"peerDependencies": {
|
|
37
41
|
"@mariozechner/pi-coding-agent": "*"
|
|
38
42
|
},
|
package/picomatch.d.ts
ADDED