pi-voice-input 0.2.9 → 0.2.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +6 -4
- package/CONTRIBUTING.md +4 -4
- package/README.md +3 -3
- package/extensions/index.ts +1 -0
- package/extensions/voice-input.ts +40 -3
- package/package.json +2 -2
package/AGENTS.md
CHANGED
|
@@ -7,7 +7,8 @@ Development workflow for this repo.
|
|
|
7
7
|
- Package: `pi-voice-input`
|
|
8
8
|
- GitHub: `git@github.com:tr-nc/pi-voice-input.git`
|
|
9
9
|
- npm: `pi-voice-input`
|
|
10
|
-
-
|
|
10
|
+
- Package entrypoint: `extensions/index.ts`
|
|
11
|
+
- Main extension implementation: `extensions/voice-input.ts`
|
|
11
12
|
- Current provider: VolcEngine WebSocket ASR only
|
|
12
13
|
- Provider architecture should remain extensible so more ASR providers can be added later.
|
|
13
14
|
|
|
@@ -25,8 +26,8 @@ Run from the repo root:
|
|
|
25
26
|
|
|
26
27
|
```bash
|
|
27
28
|
npm install --package-lock=false
|
|
28
|
-
npx tsc --noEmit --module NodeNext --moduleResolution NodeNext --target ES2022 --skipLibCheck --types node extensions/
|
|
29
|
-
PI_OFFLINE=1 pi -e
|
|
29
|
+
npx tsc --noEmit --module NodeNext --moduleResolution NodeNext --target ES2022 --skipLibCheck --types node extensions/index.ts
|
|
30
|
+
PI_OFFLINE=1 pi -e . --list-models
|
|
30
31
|
npm pack --dry-run
|
|
31
32
|
```
|
|
32
33
|
|
|
@@ -37,6 +38,7 @@ AGENTS.md
|
|
|
37
38
|
CONTRIBUTING.md
|
|
38
39
|
README.md
|
|
39
40
|
ROADMAP.md
|
|
41
|
+
extensions/index.ts
|
|
40
42
|
extensions/voice-input.ts
|
|
41
43
|
package.json
|
|
42
44
|
```
|
|
@@ -90,7 +92,7 @@ PI_OFFLINE=1 pi --list-models
|
|
|
90
92
|
If testing a local checkout instead of the npm package, use:
|
|
91
93
|
|
|
92
94
|
```bash
|
|
93
|
-
pi -e
|
|
95
|
+
pi -e .
|
|
94
96
|
```
|
|
95
97
|
|
|
96
98
|
Do not leave local development wrappers in `~/.pi/agent/extensions/voice-input.ts` when validating the npm installation path.
|
package/CONTRIBUTING.md
CHANGED
|
@@ -41,10 +41,10 @@ cd pi-voice-input
|
|
|
41
41
|
npm install
|
|
42
42
|
```
|
|
43
43
|
|
|
44
|
-
Run the
|
|
44
|
+
Run the package directly from the checkout:
|
|
45
45
|
|
|
46
46
|
```bash
|
|
47
|
-
pi -e
|
|
47
|
+
pi -e .
|
|
48
48
|
```
|
|
49
49
|
|
|
50
50
|
Or install the local checkout into pi while developing:
|
|
@@ -82,8 +82,8 @@ Do not commit this config file or any secrets.
|
|
|
82
82
|
Before opening a pull request, run:
|
|
83
83
|
|
|
84
84
|
```bash
|
|
85
|
-
npx tsc --noEmit --module NodeNext --moduleResolution NodeNext --target ES2022 --skipLibCheck --types node extensions/
|
|
86
|
-
PI_OFFLINE=1 pi -e
|
|
85
|
+
npx tsc --noEmit --module NodeNext --moduleResolution NodeNext --target ES2022 --skipLibCheck --types node extensions/index.ts
|
|
86
|
+
PI_OFFLINE=1 pi -e . --list-models
|
|
87
87
|
npm pack --dry-run
|
|
88
88
|
git diff --check
|
|
89
89
|
```
|
package/README.md
CHANGED
|
@@ -21,7 +21,7 @@ No Python, `uv`, or upload service is required for normal shortcut usage. On mac
|
|
|
21
21
|
## Architecture
|
|
22
22
|
|
|
23
23
|
```text
|
|
24
|
-
pi extension: extensions/voice-input.ts
|
|
24
|
+
pi extension: extensions/index.ts → extensions/voice-input.ts
|
|
25
25
|
├─ registers Ctrl+Shift+R and /voice commands
|
|
26
26
|
├─ starts/stops a local recorder process
|
|
27
27
|
│ ├─ Linux preferred: pw-record
|
|
@@ -164,10 +164,10 @@ cd pi-voice-input
|
|
|
164
164
|
npm install
|
|
165
165
|
```
|
|
166
166
|
|
|
167
|
-
Run directly
|
|
167
|
+
Run directly from the package checkout:
|
|
168
168
|
|
|
169
169
|
```bash
|
|
170
|
-
pi -e
|
|
170
|
+
pi -e .
|
|
171
171
|
```
|
|
172
172
|
|
|
173
173
|
Or install the local checkout while developing:
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { default } from "./voice-input.js";
|
|
@@ -40,7 +40,9 @@ Rules:
|
|
|
40
40
|
- Preserve the user's information faithfully. Do not over-summarize or compress. Do not delete constraints, examples, numbers, filenames, errors, multiple requests, ordering, or emphasis.
|
|
41
41
|
- Correct obvious ASR mistakes, homophones, segmentation, and punctuation. Preserve code identifiers, commands, paths, URLs, model names, package names, and proper nouns.
|
|
42
42
|
- If the user self-corrects, keep only the corrected intent and remove the false start, correction process, filler, and chatter. Do not lose any other substantive information.
|
|
43
|
-
- Make the output complete relative to the raw speech, logically clear, and actionable
|
|
43
|
+
- Make the output complete relative to the raw speech, logically clear, and actionable, but do not drop raw-speech information or repeat existing draft text.
|
|
44
|
+
- Preserve the raw speech layout. If the raw speech is a single line, output a single line unless the user explicitly dictates line breaks or another multiline layout, for example by saying "new line" or "换行".
|
|
45
|
+
- Do not introduce line breaks, bullets, numbered lists, tables, or code fences merely to improve style.
|
|
44
46
|
- Do not invent requirements that the raw speech did not express. If uncertain, keep the original meaning and express it more clearly.
|
|
45
47
|
- The output language must match the primary language of the raw speech, not the context language and not this English prompt. Do not translate just because the instructions are in English.`;
|
|
46
48
|
|
|
@@ -924,7 +926,18 @@ function resolvePostprocessModel(ctx: ExtensionContext, reference: string): Mode
|
|
|
924
926
|
}
|
|
925
927
|
|
|
926
928
|
function extractAssistantText(message: { content: unknown }): string {
|
|
927
|
-
|
|
929
|
+
const content = message.content;
|
|
930
|
+
if (typeof content === "string") return content.trim();
|
|
931
|
+
if (!Array.isArray(content)) return "";
|
|
932
|
+
return content
|
|
933
|
+
.map((part) => {
|
|
934
|
+
if (!part || typeof part !== "object") return "";
|
|
935
|
+
const block = part as { type?: unknown; text?: unknown };
|
|
936
|
+
if (block.type === "text" && typeof block.text === "string") return block.text;
|
|
937
|
+
return "";
|
|
938
|
+
})
|
|
939
|
+
.join("")
|
|
940
|
+
.trim();
|
|
928
941
|
}
|
|
929
942
|
|
|
930
943
|
function cleanPostprocessOutput(output: string): string {
|
|
@@ -935,6 +948,27 @@ function cleanPostprocessOutput(output: string): string {
|
|
|
935
948
|
return text;
|
|
936
949
|
}
|
|
937
950
|
|
|
951
|
+
function rawTextRequestsMultiline(rawText: string): boolean {
|
|
952
|
+
return (
|
|
953
|
+
/\r|\n/.test(rawText) ||
|
|
954
|
+
/\b(?:new\s*line|newline|line break|next line|new paragraph|paragraph break|carriage return|press enter|separate lines?|multi[- ]line|multiple lines)\b/i.test(rawText) ||
|
|
955
|
+
/(?:换行|新的一行|另起一行|下一行|回车|分行|多行|逐行|每行|空一行|新段落|另起一段|分段)/u.test(rawText)
|
|
956
|
+
);
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
function collapseUnexpectedLineBreaks(text: string): string {
|
|
960
|
+
return text
|
|
961
|
+
.replace(/\r\n?/g, "\n")
|
|
962
|
+
.replace(/[ \t\f\v]*\n+[ \t\f\v]*/g, " ")
|
|
963
|
+
.replace(/[ \t\f\v]{2,}/g, " ")
|
|
964
|
+
.trim();
|
|
965
|
+
}
|
|
966
|
+
|
|
967
|
+
function preserveExpectedPostprocessLayout(rawText: string, output: string): string {
|
|
968
|
+
if (rawTextRequestsMultiline(rawText)) return output.trim();
|
|
969
|
+
return collapseUnexpectedLineBreaks(output);
|
|
970
|
+
}
|
|
971
|
+
|
|
938
972
|
function removeEditorDraftEcho(editorText: string, output: string): string {
|
|
939
973
|
const draft = editorText.trim();
|
|
940
974
|
const text = output.trim();
|
|
@@ -981,6 +1015,7 @@ function buildPostprocessPrompt(ctx: ExtensionContext, rawText: string, config:
|
|
|
981
1015
|
"IMPORTANT: your output will be pasted verbatim at the current cursor position. It is not a replacement and not a rewrite of the whole editor draft.",
|
|
982
1016
|
"The current editor draft is context only. Do not rewrite, repeat, complete, delete, or replace existing draft text. Do not output the full sentence after insertion.",
|
|
983
1017
|
"The true cursor position is not marked in the draft shown here; the pi editor owns the actual insertion point. Do not guess the cursor and synthesize a full surrounding sentence.",
|
|
1018
|
+
"Preserve layout: if the raw ASR text is one line, output one line unless the user explicitly dictated line breaks or another multiline layout.",
|
|
984
1019
|
"If the raw speech is an inline insertion, continuation, a few words, or a phrase, output only the newly spoken words or phrase.",
|
|
985
1020
|
"Example: draft is `Please make this function async and [cursor].`, raw speech is `add error handling`, correct output is `add error handling`, not `Please make this function async and add error handling.`.",
|
|
986
1021
|
"Example: draft is `This variable name is [cursor]unclear`, raw speech is `still`, correct output is `still`, not `This variable name is still unclear`.",
|
|
@@ -1038,7 +1073,9 @@ async function postprocessTranscript(ctx: ExtensionContext, rawText: string, con
|
|
|
1038
1073
|
}
|
|
1039
1074
|
|
|
1040
1075
|
const polished = cleanPostprocessOutput(extractAssistantText(response));
|
|
1041
|
-
|
|
1076
|
+
if (!polished) return rawText;
|
|
1077
|
+
const insertion = removeEditorDraftEcho(getFullEditorText(ctx), polished);
|
|
1078
|
+
return preserveExpectedPostprocessLayout(raw, insertion) || rawText;
|
|
1042
1079
|
}
|
|
1043
1080
|
|
|
1044
1081
|
function insertIntoEditor(ctx: ExtensionContext, text: string) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-voice-input",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.11",
|
|
4
4
|
"description": "Press Ctrl+Shift+R to dictate prompts into Pi using VolcEngine ASR",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"keywords": [
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
],
|
|
35
35
|
"pi": {
|
|
36
36
|
"extensions": [
|
|
37
|
-
"extensions"
|
|
37
|
+
"extensions/index.ts"
|
|
38
38
|
]
|
|
39
39
|
},
|
|
40
40
|
"dependencies": {
|