pi-voice-input 0.2.9 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/AGENTS.md CHANGED
@@ -7,7 +7,8 @@ Development workflow for this repo.
7
7
  - Package: `pi-voice-input`
8
8
  - GitHub: `git@github.com:tr-nc/pi-voice-input.git`
9
9
  - npm: `pi-voice-input`
10
- - Main extension: `extensions/voice-input.ts`
10
+ - Package entrypoint: `extensions/index.ts`
11
+ - Main extension implementation: `extensions/voice-input.ts`
11
12
  - Current provider: VolcEngine WebSocket ASR only
12
13
  - Provider architecture should remain extensible so more ASR providers can be added later.
13
14
 
@@ -25,8 +26,8 @@ Run from the repo root:
25
26
 
26
27
  ```bash
27
28
  npm install --package-lock=false
28
- npx tsc --noEmit --module NodeNext --moduleResolution NodeNext --target ES2022 --skipLibCheck --types node extensions/voice-input.ts
29
- PI_OFFLINE=1 pi -e ./extensions/voice-input.ts --list-models
29
+ npx tsc --noEmit --module NodeNext --moduleResolution NodeNext --target ES2022 --skipLibCheck --types node extensions/index.ts
30
+ PI_OFFLINE=1 pi -e . --list-models
30
31
  npm pack --dry-run
31
32
  ```
32
33
 
@@ -37,6 +38,7 @@ AGENTS.md
37
38
  CONTRIBUTING.md
38
39
  README.md
39
40
  ROADMAP.md
41
+ extensions/index.ts
40
42
  extensions/voice-input.ts
41
43
  package.json
42
44
  ```
@@ -90,7 +92,7 @@ PI_OFFLINE=1 pi --list-models
90
92
  If testing a local checkout instead of the npm package, use:
91
93
 
92
94
  ```bash
93
- pi -e ./extensions/voice-input.ts
95
+ pi -e .
94
96
  ```
95
97
 
96
98
  Do not leave local development wrappers in `~/.pi/agent/extensions/voice-input.ts` when validating the npm installation path.
package/CONTRIBUTING.md CHANGED
@@ -41,10 +41,10 @@ cd pi-voice-input
41
41
  npm install
42
42
  ```
43
43
 
44
- Run the extension directly from the checkout:
44
+ Run the package directly from the checkout:
45
45
 
46
46
  ```bash
47
- pi -e ./extensions/voice-input.ts
47
+ pi -e .
48
48
  ```
49
49
 
50
50
  Or install the local checkout into pi while developing:
@@ -82,8 +82,8 @@ Do not commit this config file or any secrets.
82
82
  Before opening a pull request, run:
83
83
 
84
84
  ```bash
85
- npx tsc --noEmit --module NodeNext --moduleResolution NodeNext --target ES2022 --skipLibCheck --types node extensions/voice-input.ts
86
- PI_OFFLINE=1 pi -e ./extensions/voice-input.ts --list-models
85
+ npx tsc --noEmit --module NodeNext --moduleResolution NodeNext --target ES2022 --skipLibCheck --types node extensions/index.ts
86
+ PI_OFFLINE=1 pi -e . --list-models
87
87
  npm pack --dry-run
88
88
  git diff --check
89
89
  ```
package/README.md CHANGED
@@ -21,7 +21,7 @@ No Python, `uv`, or upload service is required for normal shortcut usage. On mac
21
21
  ## Architecture
22
22
 
23
23
  ```text
24
- pi extension: extensions/voice-input.ts
24
+ pi extension: extensions/index.ts → extensions/voice-input.ts
25
25
  ├─ registers Ctrl+Shift+R and /voice commands
26
26
  ├─ starts/stops a local recorder process
27
27
  │ ├─ Linux preferred: pw-record
@@ -164,10 +164,10 @@ cd pi-voice-input
164
164
  npm install
165
165
  ```
166
166
 
167
- Run directly without installing the package:
167
+ Run directly from the package checkout:
168
168
 
169
169
  ```bash
170
- pi -e ./extensions/voice-input.ts
170
+ pi -e .
171
171
  ```
172
172
 
173
173
  Or install the local checkout while developing:
@@ -0,0 +1 @@
1
+ export { default } from "./voice-input.js";
@@ -40,7 +40,9 @@ Rules:
40
40
  - Preserve the user's information faithfully. Do not over-summarize or compress. Do not delete constraints, examples, numbers, filenames, errors, multiple requests, ordering, or emphasis.
41
41
  - Correct obvious ASR mistakes, homophones, segmentation, and punctuation. Preserve code identifiers, commands, paths, URLs, model names, package names, and proper nouns.
42
42
  - If the user self-corrects, keep only the corrected intent and remove the false start, correction process, filler, and chatter. Do not lose any other substantive information.
43
- - Make the output complete relative to the raw speech, logically clear, and actionable. Split into items or steps when helpful, but do not drop raw-speech information or repeat existing draft text.
43
+ - Make the output complete relative to the raw speech, logically clear, and actionable, but do not drop raw-speech information or repeat existing draft text.
44
+ - Preserve the raw speech layout. If the raw speech is a single line, output a single line unless the user explicitly dictates line breaks or another multiline layout, for example by saying "new line" or "换行".
45
+ - Do not introduce line breaks, bullets, numbered lists, tables, or code fences merely to improve style.
44
46
  - Do not invent requirements that the raw speech did not express. If uncertain, keep the original meaning and express it more clearly.
45
47
  - The output language must match the primary language of the raw speech, not the context language and not this English prompt. Do not translate just because the instructions are in English.`;
46
48
 
@@ -924,7 +926,18 @@ function resolvePostprocessModel(ctx: ExtensionContext, reference: string): Mode
924
926
  }
925
927
 
926
928
  function extractAssistantText(message: { content: unknown }): string {
927
- return textFromContent(message.content).trim();
929
+ const content = message.content;
930
+ if (typeof content === "string") return content.trim();
931
+ if (!Array.isArray(content)) return "";
932
+ return content
933
+ .map((part) => {
934
+ if (!part || typeof part !== "object") return "";
935
+ const block = part as { type?: unknown; text?: unknown };
936
+ if (block.type === "text" && typeof block.text === "string") return block.text;
937
+ return "";
938
+ })
939
+ .join("")
940
+ .trim();
928
941
  }
929
942
 
930
943
  function cleanPostprocessOutput(output: string): string {
@@ -935,6 +948,27 @@ function cleanPostprocessOutput(output: string): string {
935
948
  return text;
936
949
  }
937
950
 
951
+ function rawTextRequestsMultiline(rawText: string): boolean {
952
+ return (
953
+ /\r|\n/.test(rawText) ||
954
+ /\b(?:new\s*line|newline|line break|next line|new paragraph|paragraph break|carriage return|press enter|separate lines?|multi[- ]line|multiple lines)\b/i.test(rawText) ||
955
+ /(?:换行|新的一行|另起一行|下一行|回车|分行|多行|逐行|每行|空一行|新段落|另起一段|分段)/u.test(rawText)
956
+ );
957
+ }
958
+
959
+ function collapseUnexpectedLineBreaks(text: string): string {
960
+ return text
961
+ .replace(/\r\n?/g, "\n")
962
+ .replace(/[ \t\f\v]*\n+[ \t\f\v]*/g, " ")
963
+ .replace(/[ \t\f\v]{2,}/g, " ")
964
+ .trim();
965
+ }
966
+
967
+ function preserveExpectedPostprocessLayout(rawText: string, output: string): string {
968
+ if (rawTextRequestsMultiline(rawText)) return output.trim();
969
+ return collapseUnexpectedLineBreaks(output);
970
+ }
971
+
938
972
  function removeEditorDraftEcho(editorText: string, output: string): string {
939
973
  const draft = editorText.trim();
940
974
  const text = output.trim();
@@ -981,6 +1015,7 @@ function buildPostprocessPrompt(ctx: ExtensionContext, rawText: string, config:
981
1015
  "IMPORTANT: your output will be pasted verbatim at the current cursor position. It is not a replacement and not a rewrite of the whole editor draft.",
982
1016
  "The current editor draft is context only. Do not rewrite, repeat, complete, delete, or replace existing draft text. Do not output the full sentence after insertion.",
983
1017
  "The true cursor position is not marked in the draft shown here; the pi editor owns the actual insertion point. Do not guess the cursor and synthesize a full surrounding sentence.",
1018
+ "Preserve layout: if the raw ASR text is one line, output one line unless the user explicitly dictated line breaks or another multiline layout.",
984
1019
  "If the raw speech is an inline insertion, continuation, a few words, or a phrase, output only the newly spoken words or phrase.",
985
1020
  "Example: draft is `Please make this function async and [cursor].`, raw speech is `add error handling`, correct output is `add error handling`, not `Please make this function async and add error handling.`.",
986
1021
  "Example: draft is `This variable name is [cursor]unclear`, raw speech is `still`, correct output is `still`, not `This variable name is still unclear`.",
@@ -1038,7 +1073,9 @@ async function postprocessTranscript(ctx: ExtensionContext, rawText: string, con
1038
1073
  }
1039
1074
 
1040
1075
  const polished = cleanPostprocessOutput(extractAssistantText(response));
1041
- return polished ? removeEditorDraftEcho(getFullEditorText(ctx), polished) : rawText;
1076
+ if (!polished) return rawText;
1077
+ const insertion = removeEditorDraftEcho(getFullEditorText(ctx), polished);
1078
+ return preserveExpectedPostprocessLayout(raw, insertion) || rawText;
1042
1079
  }
1043
1080
 
1044
1081
  function insertIntoEditor(ctx: ExtensionContext, text: string) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-voice-input",
3
- "version": "0.2.9",
3
+ "version": "0.2.11",
4
4
  "description": "Press Ctrl+Shift+R to dictate prompts into Pi using VolcEngine ASR",
5
5
  "type": "module",
6
6
  "keywords": [
@@ -34,7 +34,7 @@
34
34
  ],
35
35
  "pi": {
36
36
  "extensions": [
37
- "extensions"
37
+ "extensions/index.ts"
38
38
  ]
39
39
  },
40
40
  "dependencies": {