npm - pi-voice-input - Versions diffs - 0.2.9 → 0.2.11 - Mend

pi-voice-input 0.2.9 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/AGENTS.md +6 -4
package/CONTRIBUTING.md +4 -4
package/README.md +3 -3
package/extensions/index.ts +1 -0
package/extensions/voice-input.ts +40 -3
package/package.json +2 -2

package/AGENTS.md CHANGED Viewed

@@ -7,7 +7,8 @@ Development workflow for this repo.
 - Package: `pi-voice-input`
 - GitHub: `git@github.com:tr-nc/pi-voice-input.git`
 - npm: `pi-voice-input`
-- Main extension: `extensions/voice-input.ts`
+- Package entrypoint: `extensions/index.ts`
+- Main extension implementation: `extensions/voice-input.ts`
 - Current provider: VolcEngine WebSocket ASR only
 - Provider architecture should remain extensible so more ASR providers can be added later.
@@ -25,8 +26,8 @@ Run from the repo root:
 ```bash
 npm install --package-lock=false
-npx tsc --noEmit --module NodeNext --moduleResolution NodeNext --target ES2022 --skipLibCheck --types node extensions/voice-input.ts
-PI_OFFLINE=1 pi -e ./extensions/voice-input.ts --list-models
+npx tsc --noEmit --module NodeNext --moduleResolution NodeNext --target ES2022 --skipLibCheck --types node extensions/index.ts
+PI_OFFLINE=1 pi -e . --list-models
 npm pack --dry-run
 ```
@@ -37,6 +38,7 @@ AGENTS.md
 CONTRIBUTING.md
 README.md
 ROADMAP.md
+extensions/index.ts
 extensions/voice-input.ts
 package.json
 ```
@@ -90,7 +92,7 @@ PI_OFFLINE=1 pi --list-models
 If testing a local checkout instead of the npm package, use:
 ```bash
-pi -e ./extensions/voice-input.ts
+pi -e .
 ```
 Do not leave local development wrappers in `~/.pi/agent/extensions/voice-input.ts` when validating the npm installation path.

package/CONTRIBUTING.md CHANGED Viewed

@@ -41,10 +41,10 @@ cd pi-voice-input
 npm install
 ```
-Run the extension directly from the checkout:
+Run the package directly from the checkout:
 ```bash
-pi -e ./extensions/voice-input.ts
+pi -e .
 ```
 Or install the local checkout into pi while developing:
@@ -82,8 +82,8 @@ Do not commit this config file or any secrets.
 Before opening a pull request, run:
 ```bash
-npx tsc --noEmit --module NodeNext --moduleResolution NodeNext --target ES2022 --skipLibCheck --types node extensions/voice-input.ts
-PI_OFFLINE=1 pi -e ./extensions/voice-input.ts --list-models
+npx tsc --noEmit --module NodeNext --moduleResolution NodeNext --target ES2022 --skipLibCheck --types node extensions/index.ts
+PI_OFFLINE=1 pi -e . --list-models
 npm pack --dry-run
 git diff --check
 ```

package/README.md CHANGED Viewed

@@ -21,7 +21,7 @@ No Python, `uv`, or upload service is required for normal shortcut usage. On mac
 ## Architecture
 ```text
-pi extension: extensions/voice-input.ts
+pi extension: extensions/index.ts → extensions/voice-input.ts
   ├─ registers Ctrl+Shift+R and /voice commands
   ├─ starts/stops a local recorder process
   │    ├─ Linux preferred: pw-record
@@ -164,10 +164,10 @@ cd pi-voice-input
 npm install
 ```
-Run directly without installing the package:
+Run directly from the package checkout:
 ```bash
-pi -e ./extensions/voice-input.ts
+pi -e .
 ```
 Or install the local checkout while developing:

package/extensions/index.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export { default } from "./voice-input.js";

package/extensions/voice-input.ts CHANGED Viewed

@@ -40,7 +40,9 @@ Rules:
 - Preserve the user's information faithfully. Do not over-summarize or compress. Do not delete constraints, examples, numbers, filenames, errors, multiple requests, ordering, or emphasis.
 - Correct obvious ASR mistakes, homophones, segmentation, and punctuation. Preserve code identifiers, commands, paths, URLs, model names, package names, and proper nouns.
 - If the user self-corrects, keep only the corrected intent and remove the false start, correction process, filler, and chatter. Do not lose any other substantive information.
-- Make the output complete relative to the raw speech, logically clear, and actionable. Split into items or steps when helpful, but do not drop raw-speech information or repeat existing draft text.
+- Make the output complete relative to the raw speech, logically clear, and actionable, but do not drop raw-speech information or repeat existing draft text.
+- Preserve the raw speech layout. If the raw speech is a single line, output a single line unless the user explicitly dictates line breaks or another multiline layout, for example by saying "new line" or "换行".
+- Do not introduce line breaks, bullets, numbered lists, tables, or code fences merely to improve style.
 - Do not invent requirements that the raw speech did not express. If uncertain, keep the original meaning and express it more clearly.
 - The output language must match the primary language of the raw speech, not the context language and not this English prompt. Do not translate just because the instructions are in English.`;
@@ -924,7 +926,18 @@ function resolvePostprocessModel(ctx: ExtensionContext, reference: string): Mode
 }
 function extractAssistantText(message: { content: unknown }): string {
-  return textFromContent(message.content).trim();
+  const content = message.content;
+  if (typeof content === "string") return content.trim();
+  if (!Array.isArray(content)) return "";
+  return content
+    .map((part) => {
+      if (!part || typeof part !== "object") return "";
+      const block = part as { type?: unknown; text?: unknown };
+      if (block.type === "text" && typeof block.text === "string") return block.text;
+      return "";
+    })
+    .join("")
+    .trim();
 }
 function cleanPostprocessOutput(output: string): string {
@@ -935,6 +948,27 @@ function cleanPostprocessOutput(output: string): string {
   return text;
 }
+function rawTextRequestsMultiline(rawText: string): boolean {
+  return (
+    /\r|\n/.test(rawText) ||
+    /\b(?:new\s*line|newline|line break|next line|new paragraph|paragraph break|carriage return|press enter|separate lines?|multi[- ]line|multiple lines)\b/i.test(rawText) ||
+    /(?:换行|新的一行|另起一行|下一行|回车|分行|多行|逐行|每行|空一行|新段落|另起一段|分段)/u.test(rawText)
+  );
+}
+function collapseUnexpectedLineBreaks(text: string): string {
+  return text
+    .replace(/\r\n?/g, "\n")
+    .replace(/[ \t\f\v]*\n+[ \t\f\v]*/g, " ")
+    .replace(/[ \t\f\v]{2,}/g, " ")
+    .trim();
+}
+function preserveExpectedPostprocessLayout(rawText: string, output: string): string {
+  if (rawTextRequestsMultiline(rawText)) return output.trim();
+  return collapseUnexpectedLineBreaks(output);
+}
 function removeEditorDraftEcho(editorText: string, output: string): string {
   const draft = editorText.trim();
   const text = output.trim();
@@ -981,6 +1015,7 @@ function buildPostprocessPrompt(ctx: ExtensionContext, rawText: string, config:
     "IMPORTANT: your output will be pasted verbatim at the current cursor position. It is not a replacement and not a rewrite of the whole editor draft.",
     "The current editor draft is context only. Do not rewrite, repeat, complete, delete, or replace existing draft text. Do not output the full sentence after insertion.",
     "The true cursor position is not marked in the draft shown here; the pi editor owns the actual insertion point. Do not guess the cursor and synthesize a full surrounding sentence.",
+    "Preserve layout: if the raw ASR text is one line, output one line unless the user explicitly dictated line breaks or another multiline layout.",
     "If the raw speech is an inline insertion, continuation, a few words, or a phrase, output only the newly spoken words or phrase.",
     "Example: draft is `Please make this function async and [cursor].`, raw speech is `add error handling`, correct output is `add error handling`, not `Please make this function async and add error handling.`.",
     "Example: draft is `This variable name is [cursor]unclear`, raw speech is `still`, correct output is `still`, not `This variable name is still unclear`.",
@@ -1038,7 +1073,9 @@ async function postprocessTranscript(ctx: ExtensionContext, rawText: string, con
   }
   const polished = cleanPostprocessOutput(extractAssistantText(response));
-  return polished ? removeEditorDraftEcho(getFullEditorText(ctx), polished) : rawText;
+  if (!polished) return rawText;
+  const insertion = removeEditorDraftEcho(getFullEditorText(ctx), polished);
+  return preserveExpectedPostprocessLayout(raw, insertion) || rawText;
 }
 function insertIntoEditor(ctx: ExtensionContext, text: string) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-voice-input",
-  "version": "0.2.9",
+  "version": "0.2.11",
   "description": "Press Ctrl+Shift+R to dictate prompts into Pi using VolcEngine ASR",
   "type": "module",
   "keywords": [
@@ -34,7 +34,7 @@
   ],
   "pi": {
     "extensions": [
-      "extensions"
+      "extensions/index.ts"
     ]
   },
   "dependencies": {