zerg-ztc 0.1.7 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/dist/App.d.ts.map +1 -1
  2. package/dist/App.js +75 -8
  3. package/dist/App.js.map +1 -1
  4. package/dist/agent/agent.d.ts +2 -0
  5. package/dist/agent/agent.d.ts.map +1 -1
  6. package/dist/agent/agent.js +111 -10
  7. package/dist/agent/agent.js.map +1 -1
  8. package/dist/agent/backends/anthropic.d.ts.map +1 -1
  9. package/dist/agent/backends/anthropic.js +15 -3
  10. package/dist/agent/backends/anthropic.js.map +1 -1
  11. package/dist/agent/backends/gemini.d.ts.map +1 -1
  12. package/dist/agent/backends/gemini.js +12 -0
  13. package/dist/agent/backends/gemini.js.map +1 -1
  14. package/dist/agent/backends/index.d.ts +1 -1
  15. package/dist/agent/backends/index.d.ts.map +1 -1
  16. package/dist/agent/backends/openai_compatible.d.ts.map +1 -1
  17. package/dist/agent/backends/openai_compatible.js +12 -0
  18. package/dist/agent/backends/openai_compatible.js.map +1 -1
  19. package/dist/agent/backends/types.d.ts +21 -1
  20. package/dist/agent/backends/types.d.ts.map +1 -1
  21. package/dist/agent/commands/dictation.d.ts +3 -0
  22. package/dist/agent/commands/dictation.d.ts.map +1 -0
  23. package/dist/agent/commands/dictation.js +10 -0
  24. package/dist/agent/commands/dictation.js.map +1 -0
  25. package/dist/agent/commands/index.d.ts.map +1 -1
  26. package/dist/agent/commands/index.js +2 -1
  27. package/dist/agent/commands/index.js.map +1 -1
  28. package/dist/agent/commands/types.d.ts +7 -0
  29. package/dist/agent/commands/types.d.ts.map +1 -1
  30. package/dist/agent/runtime/capabilities.d.ts +2 -1
  31. package/dist/agent/runtime/capabilities.d.ts.map +1 -1
  32. package/dist/agent/runtime/capabilities.js +1 -0
  33. package/dist/agent/runtime/capabilities.js.map +1 -1
  34. package/dist/agent/tools/index.d.ts +1 -0
  35. package/dist/agent/tools/index.d.ts.map +1 -1
  36. package/dist/agent/tools/index.js +6 -1
  37. package/dist/agent/tools/index.js.map +1 -1
  38. package/dist/agent/tools/screenshot.d.ts +23 -0
  39. package/dist/agent/tools/screenshot.d.ts.map +1 -0
  40. package/dist/agent/tools/screenshot.js +735 -0
  41. package/dist/agent/tools/screenshot.js.map +1 -0
  42. package/dist/components/InputArea.d.ts +1 -0
  43. package/dist/components/InputArea.d.ts.map +1 -1
  44. package/dist/components/InputArea.js +591 -43
  45. package/dist/components/InputArea.js.map +1 -1
  46. package/dist/components/SingleMessage.d.ts.map +1 -1
  47. package/dist/components/SingleMessage.js +157 -7
  48. package/dist/components/SingleMessage.js.map +1 -1
  49. package/dist/config/types.d.ts +6 -0
  50. package/dist/config/types.d.ts.map +1 -1
  51. package/dist/ui/views/status_bar.js +2 -2
  52. package/dist/ui/views/status_bar.js.map +1 -1
  53. package/dist/utils/dictation.d.ts +46 -0
  54. package/dist/utils/dictation.d.ts.map +1 -0
  55. package/dist/utils/dictation.js +409 -0
  56. package/dist/utils/dictation.js.map +1 -0
  57. package/dist/utils/dictation_native.d.ts +51 -0
  58. package/dist/utils/dictation_native.d.ts.map +1 -0
  59. package/dist/utils/dictation_native.js +216 -0
  60. package/dist/utils/dictation_native.js.map +1 -0
  61. package/dist/utils/path_complete.d.ts.map +1 -1
  62. package/dist/utils/path_complete.js +31 -6
  63. package/dist/utils/path_complete.js.map +1 -1
  64. package/dist/utils/path_format.d.ts +20 -0
  65. package/dist/utils/path_format.d.ts.map +1 -0
  66. package/dist/utils/path_format.js +90 -0
  67. package/dist/utils/path_format.js.map +1 -0
  68. package/dist/utils/table.d.ts +38 -0
  69. package/dist/utils/table.d.ts.map +1 -0
  70. package/dist/utils/table.js +133 -0
  71. package/dist/utils/table.js.map +1 -0
  72. package/dist/utils/tool_trace.d.ts +7 -2
  73. package/dist/utils/tool_trace.d.ts.map +1 -1
  74. package/dist/utils/tool_trace.js +156 -51
  75. package/dist/utils/tool_trace.js.map +1 -1
  76. package/package.json +4 -1
  77. package/packages/ztc-dictation/Cargo.toml +43 -0
  78. package/packages/ztc-dictation/README.md +65 -0
  79. package/packages/ztc-dictation/bin/.gitkeep +0 -0
  80. package/packages/ztc-dictation/index.d.ts +16 -0
  81. package/packages/ztc-dictation/index.js +74 -0
  82. package/packages/ztc-dictation/package.json +41 -0
  83. package/packages/ztc-dictation/src/main.rs +430 -0
  84. package/src/App.tsx +110 -7
  85. package/src/agent/agent.ts +116 -11
  86. package/src/agent/backends/anthropic.ts +15 -5
  87. package/src/agent/backends/gemini.ts +12 -0
  88. package/src/agent/backends/index.ts +1 -0
  89. package/src/agent/backends/openai_compatible.ts +12 -0
  90. package/src/agent/backends/types.ts +25 -1
  91. package/src/agent/commands/dictation.ts +11 -0
  92. package/src/agent/commands/index.ts +2 -0
  93. package/src/agent/commands/types.ts +8 -0
  94. package/src/agent/runtime/capabilities.ts +2 -1
  95. package/src/agent/tools/index.ts +6 -1
  96. package/src/agent/tools/screenshot.ts +821 -0
  97. package/src/components/InputArea.tsx +606 -42
  98. package/src/components/SingleMessage.tsx +248 -9
  99. package/src/config/types.ts +7 -0
  100. package/src/ui/views/status_bar.ts +2 -2
  101. package/src/utils/dictation.ts +467 -0
  102. package/src/utils/dictation_native.ts +258 -0
  103. package/src/utils/path_complete.ts +30 -4
  104. package/src/utils/path_format.ts +99 -0
  105. package/src/utils/table.ts +171 -0
  106. package/src/utils/tool_trace.ts +184 -54
@@ -9,6 +9,25 @@ import chalk from 'chalk';
9
9
  import { saveClipboardImage } from '../utils/clipboard_image.js';
10
10
  import { renderImagePreview } from '../utils/image_preview.js';
11
11
  import { completePath } from '../utils/path_complete.js';
12
+ import {
13
+ isRecording as isLegacyRecording,
14
+ startRecording as startLegacyRecording,
15
+ stopRecordingAndTranscribe as stopLegacyRecording,
16
+ cancelRecording as cancelLegacyRecording,
17
+ isDictationAvailable as isLegacyDictationAvailable
18
+ } from '../utils/dictation.js';
19
+ import {
20
+ isNativeDictationAvailable,
21
+ isNativeRecording,
22
+ startNativeRecording,
23
+ stopNativeRecording,
24
+ cancelNativeRecording
25
+ } from '../utils/dictation_native.js';
26
+
27
+ // Use native dictation if available, otherwise fall back to legacy
28
+ const useNative = isNativeDictationAvailable();
29
+ const isRecording = () => useNative ? isNativeRecording() : isLegacyRecording();
30
+ const isDictationAvailable = () => useNative ? true : isLegacyDictationAvailable();
12
31
  import {
13
32
  createEmptyState,
14
33
  insertText,
@@ -25,6 +44,30 @@ import {
25
44
  PASTE_BADGE_THRESHOLD
26
45
  } from '../ui/core/input_segments.js';
27
46
 
47
+ // Helper to check for Ctrl key combinations
48
+ // Handles multiple formats:
49
+ // 1. Ink's key.ctrl + letter
50
+ // 2. Traditional control characters (\x01 for Ctrl+A, etc.)
51
+ // 3. Kitty keyboard protocol: [<keycode>;5u where keycode is ASCII, 5 = Ctrl
52
+ function isCtrl(input: string, key: InputKey, letter: string): boolean {
53
+ const lowerLetter = letter.toLowerCase();
54
+
55
+ // Method 1: Ink's key.ctrl flag
56
+ if (key.ctrl && input === lowerLetter) return true;
57
+
58
+ // Method 2: Traditional control character (Ctrl+A = \x01, etc.)
59
+ const ctrlCode = lowerLetter.charCodeAt(0) - 96; // 'a' -> 1, 'b' -> 2, etc.
60
+ if (input === String.fromCharCode(ctrlCode)) return true;
61
+
62
+ // Method 3: Kitty keyboard protocol [<keycode>;5u (fallback, main handling is in handleKittyInput)
63
+ // keycode is ASCII code of the letter, 5 = Ctrl modifier
64
+ const asciiCode = lowerLetter.charCodeAt(0); // 'a' -> 97, 'r' -> 114, etc.
65
+ const kittyPattern = `[${asciiCode};5u`;
66
+ if (input === kittyPattern || input === `\x1b${kittyPattern}`) return true;
67
+
68
+ return false;
69
+ }
70
+
28
71
  interface InputAreaProps {
29
72
  onSubmit: (text: string) => void;
30
73
  onCommand?: (command: string, args: string[]) => void;
@@ -35,6 +78,7 @@ interface InputAreaProps {
35
78
  }>;
36
79
  onStateChange?: (state: InputState) => void;
37
80
  onToast?: (message: string) => void;
81
+ onDictationStateChange?: (state: 'idle' | 'recording' | 'transcribing') => void;
38
82
  cols?: number;
39
83
  inputBus?: InputBus;
40
84
  disabled?: boolean;
@@ -108,6 +152,7 @@ export const InputArea: React.FC<InputAreaProps> = ({
108
152
  commands = [],
109
153
  onStateChange,
110
154
  onToast,
155
+ onDictationStateChange,
111
156
  cols = process.stdout.columns || 80,
112
157
  inputBus,
113
158
  disabled = false,
@@ -121,6 +166,7 @@ export const InputArea: React.FC<InputAreaProps> = ({
121
166
  const [badgePreview, setBadgePreview] = React.useState<string[] | null>(null);
122
167
  const killRingRef = React.useRef<string[]>([]);
123
168
  const killIndexRef = React.useRef<number>(-1);
169
+ const dictationBusyRef = React.useRef<boolean>(false);
124
170
 
125
171
  // Bracketed paste mode support - buffer paste content between \x1b[200~ and \x1b[201~
126
172
  const pasteBufferRef = React.useRef<string>('');
@@ -494,6 +540,168 @@ export const InputArea: React.FC<InputAreaProps> = ({
494
540
  // Update ref so handleSubmit can access it
495
541
  handleClipboardImagePasteRef.current = handleClipboardImagePaste;
496
542
 
543
+ // Handle Kitty keyboard protocol sequences directly
544
+ // This is called when we detect a Kitty sequence in handleInput
545
+ const handleKittyInput = useCallback((char: string, key: InputKey, keycode: number, modifier: number) => {
546
+ if (disabled) return;
547
+
548
+ const isKittyCtrl = modifier === 5;
549
+ const isKittyMeta = modifier === 9;
550
+ const lowerChar = char.toLowerCase();
551
+
552
+ // Navigation
553
+ if (isKittyCtrl && lowerChar === 'a') {
554
+ dispatch({ type: 'apply', state: { cursor: { index: 0, offset: 0 } } });
555
+ return;
556
+ }
557
+ if (isKittyCtrl && lowerChar === 'e') {
558
+ dispatch({ type: 'apply', state: { cursor: { index: state.segments.length, offset: 0 } } });
559
+ return;
560
+ }
561
+ if (isKittyCtrl && lowerChar === 'b') {
562
+ dispatch({ type: 'apply', state: moveLeft(state) });
563
+ return;
564
+ }
565
+ if (isKittyCtrl && lowerChar === 'f') {
566
+ dispatch({ type: 'apply', state: moveRight(state) });
567
+ return;
568
+ }
569
+ if (isKittyCtrl && lowerChar === 'p') {
570
+ navigateHistory('up');
571
+ return;
572
+ }
573
+ if (isKittyCtrl && lowerChar === 'n') {
574
+ navigateHistory('down');
575
+ return;
576
+ }
577
+
578
+ // Kill/yank
579
+ if (isKittyCtrl && lowerChar === 'u') {
580
+ const { next, killed } = killToStart(state);
581
+ pushKill(killed);
582
+ dispatch({ type: 'apply', state: next });
583
+ return;
584
+ }
585
+ if (isKittyCtrl && lowerChar === 'k') {
586
+ const { next, killed } = killToEnd(state);
587
+ pushKill(killed);
588
+ dispatch({ type: 'apply', state: next });
589
+ return;
590
+ }
591
+ if (isKittyCtrl && lowerChar === 'y') {
592
+ dispatch({ type: 'apply', state: yank(state) });
593
+ return;
594
+ }
595
+ if (isKittyCtrl && lowerChar === 'w') {
596
+ const { next, killed } = killWordBackward(state);
597
+ pushKill(killed);
598
+ dispatch({ type: 'apply', state: next });
599
+ return;
600
+ }
601
+
602
+ // Transpose
603
+ if (isKittyCtrl && lowerChar === 't') {
604
+ dispatch({ type: 'apply', state: transposeChars(state) });
605
+ return;
606
+ }
607
+
608
+ // Delete forward
609
+ if (isKittyCtrl && lowerChar === 'd') {
610
+ dispatch({ type: 'apply', state: deleteForward(state) });
611
+ return;
612
+ }
613
+
614
+ // Push-to-talk: Ctrl+R to toggle recording
615
+ if (isKittyCtrl && lowerChar === 'r') {
616
+ if (dictationBusyRef.current) return;
617
+
618
+ if (isRecording()) {
619
+ // Stop recording and transcribe
620
+ dictationBusyRef.current = true;
621
+ onDictationStateChange?.('transcribing');
622
+
623
+ if (useNative) {
624
+ stopNativeRecording()
625
+ .then((text) => {
626
+ if (text && text.trim()) {
627
+ // Submit the transcribed text directly
628
+ onSubmit(text.trim());
629
+ } else {
630
+ onToast?.('No speech detected');
631
+ }
632
+ })
633
+ .catch((err) => {
634
+ onToast?.(`Dictation error: ${err.message}`);
635
+ })
636
+ .finally(() => {
637
+ dictationBusyRef.current = false;
638
+ onDictationStateChange?.('idle');
639
+ });
640
+ } else {
641
+ stopLegacyRecording()
642
+ .then((result) => {
643
+ if (result.text && result.text.trim()) {
644
+ // Submit the transcribed text directly
645
+ onSubmit(result.text.trim());
646
+ } else {
647
+ onToast?.('No speech detected');
648
+ }
649
+ })
650
+ .catch((err) => {
651
+ onToast?.(`Dictation error: ${err.message}`);
652
+ })
653
+ .finally(() => {
654
+ dictationBusyRef.current = false;
655
+ onDictationStateChange?.('idle');
656
+ });
657
+ }
658
+ } else {
659
+ // Start recording
660
+ if (!isDictationAvailable()) {
661
+ onToast?.('Dictation not available. Build native/ztc-audio or install sox');
662
+ return;
663
+ }
664
+ try {
665
+ if (useNative) {
666
+ startNativeRecording({ model: 'tiny' });
667
+ } else {
668
+ startLegacyRecording();
669
+ }
670
+ onDictationStateChange?.('recording');
671
+ } catch (err) {
672
+ onToast?.(`Recording error: ${err instanceof Error ? err.message : 'Unknown'}`);
673
+ }
674
+ }
675
+ return;
676
+ }
677
+
678
+ // Meta key combinations
679
+ if (isKittyMeta && lowerChar === 'y') {
680
+ dispatch({ type: 'apply', state: yankPop(state) });
681
+ return;
682
+ }
683
+ if (isKittyMeta && lowerChar === 't') {
684
+ dispatch({ type: 'apply', state: transposeWords(state) });
685
+ return;
686
+ }
687
+ if (isKittyMeta && lowerChar === 'd') {
688
+ const { next, killed } = killWordForward(state);
689
+ pushKill(killed);
690
+ dispatch({ type: 'apply', state: next });
691
+ return;
692
+ }
693
+ if (isKittyMeta && lowerChar === 'b') {
694
+ dispatch({ type: 'apply', state: moveWordLeft(state) });
695
+ return;
696
+ }
697
+ if (isKittyMeta && lowerChar === 'f') {
698
+ dispatch({ type: 'apply', state: moveWordRight(state) });
699
+ return;
700
+ }
701
+
702
+ // Unknown Kitty sequence - don't insert as text
703
+ }, [disabled, killToEnd, killToStart, killWordBackward, killWordForward, navigateHistory, onDictationStateChange, onToast, pushKill, state, transposeChars, transposeWords, yank, yankPop]);
704
+
497
705
  const handleInput = useCallback((input: string, key: InputKey) => {
498
706
  // Detect Kitty keyboard protocol CSI u sequences
499
707
  // Format: ESC [ <keycode> ; <modifiers> u
@@ -507,23 +715,36 @@ export const InputArea: React.FC<InputAreaProps> = ({
507
715
  return;
508
716
  }
509
717
 
510
- // Consume any other Kitty sequences to prevent them from being displayed
511
- // Match pattern: ESC? [ number ; number u
512
- if (/\x1b?\[\d+;\d+u/.test(input)) {
513
- // This is a Kitty keyboard sequence - don't display it as text
514
- // Extract what key it is and handle accordingly
515
- const match = input.match(/\x1b?\[(\d+);(\d+)u/);
516
- if (match) {
517
- const keycode = parseInt(match[1], 10);
518
- const modifier = parseInt(match[2], 10);
519
- // Ctrl+C (99;5) or Cmd+C (99;9) - exit the app
520
- if (keycode === 99 && (modifier === 5 || modifier === 9)) {
521
- exit();
522
- return;
523
- }
524
- // Ctrl+L (108;5) - could add clear screen here if needed
718
+ // Handle Kitty keyboard protocol sequences
719
+ // Format: ESC? [ keycode ; modifier u
720
+ // Modifier 5 = Ctrl, modifier 9 = Cmd/Super
721
+ const kittyMatch = input.match(/\x1b?\[(\d+);(\d+)u/);
722
+ if (kittyMatch) {
723
+ const keycode = parseInt(kittyMatch[1], 10);
724
+ const modifier = parseInt(kittyMatch[2], 10);
725
+ const isKittyCtrl = modifier === 5;
726
+ const isKittyMeta = modifier === 9;
727
+
728
+ // Ctrl+C (99;5) or Cmd+C (99;9) - exit the app
729
+ if (keycode === 99 && (isKittyCtrl || isKittyMeta)) {
730
+ exit();
731
+ return;
525
732
  }
526
- return; // Consume other Kitty sequences
733
+
734
+ // Synthesize key flags for Kitty sequences so downstream handlers work
735
+ const kittyKey: InputKey = {
736
+ ...key,
737
+ ctrl: isKittyCtrl || key.ctrl,
738
+ meta: isKittyMeta || key.meta,
739
+ };
740
+ // Convert keycode to character for isCtrl checks
741
+ const kittyChar = String.fromCharCode(keycode);
742
+
743
+ // Route to handlers based on keycode
744
+ // Let the normal handler flow process this with synthesized key flags
745
+ // by falling through with modified key/input
746
+ handleKittyInput(kittyChar, kittyKey, keycode, modifier);
747
+ return;
527
748
  }
528
749
 
529
750
  if (disabled) return;
@@ -683,27 +904,27 @@ export const InputArea: React.FC<InputAreaProps> = ({
683
904
  return;
684
905
  }
685
906
 
686
- if (key.ctrl && input === 'a') {
907
+ if (isCtrl(input, key, 'a')) {
687
908
  dispatch({ type: 'apply', state: { cursor: { index: 0, offset: 0 } } });
688
909
  return;
689
910
  }
690
- if (key.ctrl && input === 'e') {
911
+ if (isCtrl(input, key, 'e')) {
691
912
  dispatch({ type: 'apply', state: { cursor: { index: state.segments.length, offset: 0 } } });
692
913
  return;
693
914
  }
694
- if (key.ctrl && input === 'b') {
915
+ if (isCtrl(input, key, 'b')) {
695
916
  dispatch({ type: 'apply', state: moveLeft(state) });
696
917
  return;
697
918
  }
698
- if (key.ctrl && input === 'f') {
919
+ if (isCtrl(input, key, 'f')) {
699
920
  dispatch({ type: 'apply', state: moveRight(state) });
700
921
  return;
701
922
  }
702
- if (key.ctrl && input === 'p') {
923
+ if (isCtrl(input, key, 'p')) {
703
924
  navigateHistory('up');
704
925
  return;
705
926
  }
706
- if (key.ctrl && input === 'n') {
927
+ if (isCtrl(input, key, 'n')) {
707
928
  navigateHistory('down');
708
929
  return;
709
930
  }
@@ -715,19 +936,19 @@ export const InputArea: React.FC<InputAreaProps> = ({
715
936
  return;
716
937
  }
717
938
 
718
- if (key.ctrl && input === 'u') {
939
+ if (isCtrl(input, key, 'u')) {
719
940
  const { next, killed } = killToStart(state);
720
941
  pushKill(killed);
721
942
  dispatch({ type: 'apply', state: next });
722
943
  return;
723
944
  }
724
- if (key.ctrl && input === 'k') {
945
+ if (isCtrl(input, key, 'k')) {
725
946
  const { next, killed } = killToEnd(state);
726
947
  pushKill(killed);
727
948
  dispatch({ type: 'apply', state: next });
728
949
  return;
729
950
  }
730
- if (key.ctrl && input === 'y') {
951
+ if (isCtrl(input, key, 'y')) {
731
952
  dispatch({ type: 'apply', state: yank(state) });
732
953
  return;
733
954
  }
@@ -735,7 +956,70 @@ export const InputArea: React.FC<InputAreaProps> = ({
735
956
  dispatch({ type: 'apply', state: yankPop(state) });
736
957
  return;
737
958
  }
738
- if (key.ctrl && input === 't') {
959
+ // Push-to-talk: Ctrl+R to toggle recording
960
+ if (isCtrl(input, key, 'r')) {
961
+ if (dictationBusyRef.current) return;
962
+
963
+ if (isRecording()) {
964
+ // Stop recording and transcribe
965
+ dictationBusyRef.current = true;
966
+ onDictationStateChange?.('transcribing');
967
+
968
+ if (useNative) {
969
+ stopNativeRecording()
970
+ .then((text) => {
971
+ if (text && text.trim()) {
972
+ // Submit the transcribed text directly
973
+ onSubmit(text.trim());
974
+ } else {
975
+ onToast?.('No speech detected');
976
+ }
977
+ })
978
+ .catch((err) => {
979
+ onToast?.(`Dictation error: ${err.message}`);
980
+ })
981
+ .finally(() => {
982
+ dictationBusyRef.current = false;
983
+ onDictationStateChange?.('idle');
984
+ });
985
+ } else {
986
+ stopLegacyRecording()
987
+ .then((result) => {
988
+ if (result.text && result.text.trim()) {
989
+ // Submit the transcribed text directly
990
+ onSubmit(result.text.trim());
991
+ } else {
992
+ onToast?.('No speech detected');
993
+ }
994
+ })
995
+ .catch((err) => {
996
+ onToast?.(`Dictation error: ${err.message}`);
997
+ })
998
+ .finally(() => {
999
+ dictationBusyRef.current = false;
1000
+ onDictationStateChange?.('idle');
1001
+ });
1002
+ }
1003
+ } else {
1004
+ // Start recording
1005
+ if (!isDictationAvailable()) {
1006
+ onToast?.('Dictation not available. Build native/ztc-audio or install sox');
1007
+ return;
1008
+ }
1009
+ try {
1010
+ if (useNative) {
1011
+ startNativeRecording({ model: 'tiny' });
1012
+ } else {
1013
+ startLegacyRecording();
1014
+ }
1015
+ onDictationStateChange?.('recording');
1016
+ } catch (err) {
1017
+ onToast?.(`Recording error: ${err instanceof Error ? err.message : 'Unknown'}`);
1018
+ }
1019
+ }
1020
+ return;
1021
+ }
1022
+ if (isCtrl(input, key, 't')) {
739
1023
  dispatch({ type: 'apply', state: transposeChars(state) });
740
1024
  return;
741
1025
  }
@@ -743,12 +1027,12 @@ export const InputArea: React.FC<InputAreaProps> = ({
743
1027
  dispatch({ type: 'apply', state: transposeWords(state) });
744
1028
  return;
745
1029
  }
746
- if (key.ctrl && input === 'd') {
1030
+ if (isCtrl(input, key, 'd')) {
747
1031
  dispatch({ type: 'apply', state: deleteForward(state) });
748
1032
  return;
749
1033
  }
750
1034
 
751
- if (key.ctrl && input === 'w') {
1035
+ if (isCtrl(input, key, 'w')) {
752
1036
  const { next, killed } = killWordBackward(state);
753
1037
  pushKill(killed);
754
1038
  dispatch({ type: 'apply', state: next });
@@ -787,10 +1071,225 @@ export const InputArea: React.FC<InputAreaProps> = ({
787
1071
  const next = insertText(state, input);
788
1072
  dispatch({ type: 'apply', state: { ...next, historyIdx: -1 } });
789
1073
  }
790
- }, [disabled, exit, handleClipboardImagePaste, handleSubmit, navigateHistory, state]);
1074
+ }, [disabled, exit, handleClipboardImagePaste, handleKittyInput, handleSubmit, navigateHistory, state]);
1075
+
1076
+ // Handle Kitty keyboard protocol sequences for overlay mode
1077
+ const handleKittyOverlayInput = useCallback((char: string, key: InputKey, keycode: number, modifier: number) => {
1078
+ if (disabled) return;
1079
+
1080
+ const current = overlayStateRef.current;
1081
+ const isKittyCtrl = modifier === 5;
1082
+ const isKittyMeta = modifier === 9;
1083
+ const lowerChar = char.toLowerCase();
1084
+
1085
+ // Navigation
1086
+ if (isKittyCtrl && lowerChar === 'a') {
1087
+ overlayStateRef.current = { ...current, cursor: { index: 0, offset: 0 } };
1088
+ renderOverlay(overlayStateRef.current);
1089
+ return;
1090
+ }
1091
+ if (isKittyCtrl && lowerChar === 'e') {
1092
+ overlayStateRef.current = { ...current, cursor: { index: current.segments.length, offset: 0 } };
1093
+ renderOverlay(overlayStateRef.current);
1094
+ return;
1095
+ }
1096
+ if (isKittyCtrl && lowerChar === 'b') {
1097
+ overlayStateRef.current = moveLeft(current);
1098
+ renderOverlay(overlayStateRef.current);
1099
+ return;
1100
+ }
1101
+ if (isKittyCtrl && lowerChar === 'f') {
1102
+ overlayStateRef.current = moveRight(current);
1103
+ renderOverlay(overlayStateRef.current);
1104
+ return;
1105
+ }
1106
+ if (isKittyCtrl && lowerChar === 'p') {
1107
+ if (historyEnabled && current.history.length > 0) {
1108
+ const newIdx = current.historyIdx === -1 ? current.history.length - 1 : Math.max(0, current.historyIdx - 1);
1109
+ const historyValue = current.history[newIdx] || '';
1110
+ overlayStateRef.current = {
1111
+ ...current,
1112
+ historyIdx: newIdx,
1113
+ segments: historyValue.length > 0 ? [{ type: 'text', text: historyValue }] : [],
1114
+ cursor: { index: historyValue.length > 0 ? 0 : 0, offset: historyValue.length }
1115
+ };
1116
+ renderOverlay(overlayStateRef.current);
1117
+ }
1118
+ return;
1119
+ }
1120
+ if (isKittyCtrl && lowerChar === 'n') {
1121
+ if (historyEnabled && current.history.length > 0) {
1122
+ let newIdx = current.historyIdx === -1 ? -1 : current.historyIdx + 1;
1123
+ if (newIdx >= current.history.length) newIdx = -1;
1124
+ if (newIdx === -1) {
1125
+ overlayStateRef.current = { ...createEmptyState(), history: current.history, historyIdx: -1 };
1126
+ } else {
1127
+ const historyValue = current.history[newIdx];
1128
+ overlayStateRef.current = {
1129
+ ...current,
1130
+ historyIdx: newIdx,
1131
+ segments: historyValue.length > 0 ? [{ type: 'text', text: historyValue }] : [],
1132
+ cursor: { index: historyValue.length > 0 ? 0 : 0, offset: historyValue.length }
1133
+ };
1134
+ }
1135
+ renderOverlay(overlayStateRef.current);
1136
+ }
1137
+ return;
1138
+ }
1139
+
1140
+ // Kill/yank
1141
+ if (isKittyCtrl && lowerChar === 'u') {
1142
+ const { next, killed } = killToStart(current);
1143
+ pushKill(killed);
1144
+ overlayStateRef.current = { ...next, history: current.history, historyIdx: -1 };
1145
+ renderOverlay(overlayStateRef.current);
1146
+ return;
1147
+ }
1148
+ if (isKittyCtrl && lowerChar === 'k') {
1149
+ const { next, killed } = killToEnd(current);
1150
+ pushKill(killed);
1151
+ overlayStateRef.current = next;
1152
+ renderOverlay(overlayStateRef.current);
1153
+ return;
1154
+ }
1155
+ if (isKittyCtrl && lowerChar === 'y') {
1156
+ overlayStateRef.current = yank(current);
1157
+ renderOverlay(overlayStateRef.current);
1158
+ return;
1159
+ }
1160
+ if (isKittyCtrl && lowerChar === 'w') {
1161
+ const { next, killed } = killWordBackward(current);
1162
+ pushKill(killed);
1163
+ overlayStateRef.current = next;
1164
+ renderOverlay(overlayStateRef.current);
1165
+ return;
1166
+ }
1167
+
1168
+ // Transpose
1169
+ if (isKittyCtrl && lowerChar === 't') {
1170
+ overlayStateRef.current = transposeChars(current);
1171
+ renderOverlay(overlayStateRef.current);
1172
+ return;
1173
+ }
1174
+
1175
+ // Delete forward
1176
+ if (isKittyCtrl && lowerChar === 'd') {
1177
+ overlayStateRef.current = deleteForward(current);
1178
+ renderOverlay(overlayStateRef.current);
1179
+ return;
1180
+ }
1181
+
1182
+ // Push-to-talk: Ctrl+R to toggle recording
1183
+ if (isKittyCtrl && lowerChar === 'r') {
1184
+ if (dictationBusyRef.current) return;
1185
+
1186
+ if (isRecording()) {
1187
+ dictationBusyRef.current = true;
1188
+ onDictationStateChange?.('transcribing');
1189
+
1190
+ if (useNative) {
1191
+ stopNativeRecording()
1192
+ .then((text) => {
1193
+ if (text && text.trim()) {
1194
+ // Submit the transcribed text directly
1195
+ onSubmit(text.trim());
1196
+ overlayStateRef.current = { ...overlayStateRef.current, segments: [], cursor: { index: 0, offset: 0 }, historyIdx: -1 };
1197
+ renderOverlay(overlayStateRef.current);
1198
+ } else {
1199
+ onToast?.('No speech detected');
1200
+ }
1201
+ })
1202
+ .catch((err) => {
1203
+ onToast?.(`Dictation error: ${err.message}`);
1204
+ })
1205
+ .finally(() => {
1206
+ dictationBusyRef.current = false;
1207
+ onDictationStateChange?.('idle');
1208
+ });
1209
+ } else {
1210
+ stopLegacyRecording()
1211
+ .then((result) => {
1212
+ if (result.text && result.text.trim()) {
1213
+ // Submit the transcribed text directly
1214
+ onSubmit(result.text.trim());
1215
+ overlayStateRef.current = { ...overlayStateRef.current, segments: [], cursor: { index: 0, offset: 0 }, historyIdx: -1 };
1216
+ renderOverlay(overlayStateRef.current);
1217
+ } else {
1218
+ onToast?.('No speech detected');
1219
+ }
1220
+ })
1221
+ .catch((err) => {
1222
+ onToast?.(`Dictation error: ${err.message}`);
1223
+ })
1224
+ .finally(() => {
1225
+ dictationBusyRef.current = false;
1226
+ onDictationStateChange?.('idle');
1227
+ });
1228
+ }
1229
+ } else {
1230
+ if (!isDictationAvailable()) {
1231
+ onToast?.('Dictation not available. Build native/ztc-audio or install sox');
1232
+ return;
1233
+ }
1234
+ try {
1235
+ if (useNative) {
1236
+ startNativeRecording({ model: 'tiny' });
1237
+ } else {
1238
+ startLegacyRecording();
1239
+ }
1240
+ onDictationStateChange?.('recording');
1241
+ } catch (err) {
1242
+ onToast?.(`Recording error: ${err instanceof Error ? err.message : 'Unknown'}`);
1243
+ }
1244
+ }
1245
+ return;
1246
+ }
1247
+
1248
+ // Meta key combinations
1249
+ if (isKittyMeta && lowerChar === 'y') {
1250
+ overlayStateRef.current = yankPop(current);
1251
+ renderOverlay(overlayStateRef.current);
1252
+ return;
1253
+ }
1254
+ if (isKittyMeta && lowerChar === 't') {
1255
+ overlayStateRef.current = transposeWords(current);
1256
+ renderOverlay(overlayStateRef.current);
1257
+ return;
1258
+ }
1259
+ if (isKittyMeta && lowerChar === 'd') {
1260
+ const { next, killed } = killWordForward(current);
1261
+ pushKill(killed);
1262
+ overlayStateRef.current = next;
1263
+ renderOverlay(overlayStateRef.current);
1264
+ return;
1265
+ }
1266
+ if (isKittyMeta && lowerChar === 'b') {
1267
+ overlayStateRef.current = moveWordLeft(current);
1268
+ renderOverlay(overlayStateRef.current);
1269
+ return;
1270
+ }
1271
+ if (isKittyMeta && lowerChar === 'f') {
1272
+ overlayStateRef.current = moveWordRight(current);
1273
+ renderOverlay(overlayStateRef.current);
1274
+ return;
1275
+ }
1276
+
1277
+ // Unknown Kitty sequence - don't insert as text
1278
+ }, [disabled, historyEnabled, killToEnd, killToStart, killWordBackward, killWordForward, onDictationStateChange, onToast, pushKill, renderOverlay, transposeChars, transposeWords, yank, yankPop]);
791
1279
 
792
1280
  const handleOverlayInput = useCallback((input: string, key: InputKey) => {
793
1281
  if (disabled) return;
1282
+
1283
+ // Handle Kitty keyboard protocol sequences for overlay mode
1284
+ const kittyMatch = input.match(/\x1b?\[(\d+);(\d+)u/);
1285
+ if (kittyMatch) {
1286
+ const keycode = parseInt(kittyMatch[1], 10);
1287
+ const modifier = parseInt(kittyMatch[2], 10);
1288
+ const kittyChar = String.fromCharCode(keycode);
1289
+ handleKittyOverlayInput(kittyChar, key, keycode, modifier);
1290
+ return;
1291
+ }
1292
+
794
1293
  // Detect backspace via explicit key flag or known control codes
795
1294
  const backspaceFallback = input === '\b' || input === '\x7f' || input === '\x08';
796
1295
  const isBackspace = key.backspace || key.delete || backspaceFallback;
@@ -903,28 +1402,28 @@ export const InputArea: React.FC<InputAreaProps> = ({
903
1402
  return;
904
1403
  }
905
1404
 
906
- if (key.ctrl && input === 'a') {
1405
+ if (isCtrl(input, key, 'a')) {
907
1406
  overlayStateRef.current = { ...current, cursor: { index: 0, offset: 0 } };
908
1407
  renderOverlay(overlayStateRef.current);
909
1408
  return;
910
1409
  }
911
1410
 
912
- if (key.ctrl && input === 'e') {
1411
+ if (isCtrl(input, key, 'e')) {
913
1412
  overlayStateRef.current = { ...current, cursor: { index: current.segments.length, offset: 0 } };
914
1413
  renderOverlay(overlayStateRef.current);
915
1414
  return;
916
1415
  }
917
- if (key.ctrl && input === 'b') {
1416
+ if (isCtrl(input, key, 'b')) {
918
1417
  overlayStateRef.current = moveLeft(current);
919
1418
  renderOverlay(overlayStateRef.current);
920
1419
  return;
921
1420
  }
922
- if (key.ctrl && input === 'f') {
1421
+ if (isCtrl(input, key, 'f')) {
923
1422
  overlayStateRef.current = moveRight(current);
924
1423
  renderOverlay(overlayStateRef.current);
925
1424
  return;
926
1425
  }
927
- if (key.ctrl && input === 'p') {
1426
+ if (isCtrl(input, key, 'p')) {
928
1427
  const direction = 'up';
929
1428
  if (historyEnabled && current.history.length > 0) {
930
1429
  let newIdx = current.historyIdx === -1 ? current.history.length - 1 : Math.max(0, current.historyIdx - 1);
@@ -939,7 +1438,7 @@ export const InputArea: React.FC<InputAreaProps> = ({
939
1438
  }
940
1439
  return;
941
1440
  }
942
- if (key.ctrl && input === 'n') {
1441
+ if (isCtrl(input, key, 'n')) {
943
1442
  if (historyEnabled && current.history.length > 0) {
944
1443
  let newIdx = current.historyIdx === -1 ? -1 : current.historyIdx + 1;
945
1444
  if (newIdx >= current.history.length) newIdx = -1;
@@ -966,31 +1465,96 @@ export const InputArea: React.FC<InputAreaProps> = ({
966
1465
  return;
967
1466
  }
968
1467
 
969
- if (key.ctrl && input === 'u') {
1468
+ if (isCtrl(input, key, 'u')) {
970
1469
  const { next, killed } = killToStart(current);
971
1470
  pushKill(killed);
972
1471
  overlayStateRef.current = { ...next, history: current.history, historyIdx: -1 };
973
1472
  renderOverlay(overlayStateRef.current);
974
1473
  return;
975
1474
  }
976
- if (key.ctrl && input === 'k') {
1475
+ if (isCtrl(input, key, 'k')) {
977
1476
  const { next, killed } = killToEnd(current);
978
1477
  pushKill(killed);
979
1478
  overlayStateRef.current = next;
980
1479
  renderOverlay(overlayStateRef.current);
981
1480
  return;
982
1481
  }
983
- if (key.ctrl && input === 'y') {
1482
+ if (isCtrl(input, key, 'y')) {
984
1483
  overlayStateRef.current = yank(current);
985
1484
  renderOverlay(overlayStateRef.current);
986
1485
  return;
987
1486
  }
1487
+ // Push-to-talk: Ctrl+R to toggle recording (overlay mode)
1488
+ if (isCtrl(input, key, 'r')) {
1489
+ if (dictationBusyRef.current) return;
1490
+
1491
+ if (isRecording()) {
1492
+ dictationBusyRef.current = true;
1493
+ onDictationStateChange?.('transcribing');
1494
+
1495
+ if (useNative) {
1496
+ stopNativeRecording()
1497
+ .then((text) => {
1498
+ if (text && text.trim()) {
1499
+ // Submit the transcribed text directly
1500
+ onSubmit(text.trim());
1501
+ overlayStateRef.current = { ...overlayStateRef.current, segments: [], cursor: { index: 0, offset: 0 }, historyIdx: -1 };
1502
+ renderOverlay(overlayStateRef.current);
1503
+ } else {
1504
+ onToast?.('No speech detected');
1505
+ }
1506
+ })
1507
+ .catch((err) => {
1508
+ onToast?.(`Dictation error: ${err.message}`);
1509
+ })
1510
+ .finally(() => {
1511
+ dictationBusyRef.current = false;
1512
+ onDictationStateChange?.('idle');
1513
+ });
1514
+ } else {
1515
+ stopLegacyRecording()
1516
+ .then((result) => {
1517
+ if (result.text && result.text.trim()) {
1518
+ // Submit the transcribed text directly
1519
+ onSubmit(result.text.trim());
1520
+ overlayStateRef.current = { ...overlayStateRef.current, segments: [], cursor: { index: 0, offset: 0 }, historyIdx: -1 };
1521
+ renderOverlay(overlayStateRef.current);
1522
+ } else {
1523
+ onToast?.('No speech detected');
1524
+ }
1525
+ })
1526
+ .catch((err) => {
1527
+ onToast?.(`Dictation error: ${err.message}`);
1528
+ })
1529
+ .finally(() => {
1530
+ dictationBusyRef.current = false;
1531
+ onDictationStateChange?.('idle');
1532
+ });
1533
+ }
1534
+ } else {
1535
+ if (!isDictationAvailable()) {
1536
+ onToast?.('Dictation not available. Build native/ztc-audio or install sox');
1537
+ return;
1538
+ }
1539
+ try {
1540
+ if (useNative) {
1541
+ startNativeRecording({ model: 'tiny' });
1542
+ } else {
1543
+ startLegacyRecording();
1544
+ }
1545
+ onDictationStateChange?.('recording');
1546
+ } catch (err) {
1547
+ onToast?.(`Recording error: ${err instanceof Error ? err.message : 'Unknown'}`);
1548
+ }
1549
+ }
1550
+ return;
1551
+ }
988
1552
  if (key.meta && input === 'y') {
989
1553
  overlayStateRef.current = yankPop(current);
990
1554
  renderOverlay(overlayStateRef.current);
991
1555
  return;
992
1556
  }
993
- if (key.ctrl && input === 't') {
1557
+ if (isCtrl(input, key, 't')) {
994
1558
  overlayStateRef.current = transposeChars(current);
995
1559
  renderOverlay(overlayStateRef.current);
996
1560
  return;
@@ -1000,13 +1564,13 @@ export const InputArea: React.FC<InputAreaProps> = ({
1000
1564
  renderOverlay(overlayStateRef.current);
1001
1565
  return;
1002
1566
  }
1003
- if (key.ctrl && input === 'd') {
1567
+ if (isCtrl(input, key, 'd')) {
1004
1568
  overlayStateRef.current = deleteForward(current);
1005
1569
  renderOverlay(overlayStateRef.current);
1006
1570
  return;
1007
1571
  }
1008
1572
 
1009
- if (key.ctrl && input === 'w') {
1573
+ if (isCtrl(input, key, 'w')) {
1010
1574
  const { next, killed } = killWordBackward(current);
1011
1575
  pushKill(killed);
1012
1576
  overlayStateRef.current = next;
@@ -1052,7 +1616,7 @@ export const InputArea: React.FC<InputAreaProps> = ({
1052
1616
  overlayStateRef.current = { ...overlayStateRef.current, historyIdx: -1 };
1053
1617
  renderOverlay(overlayStateRef.current);
1054
1618
  }
1055
- }, [disabled, historyEnabled, onCommand, onSubmit, renderOverlay]);
1619
+ }, [disabled, handleKittyOverlayInput, historyEnabled, onCommand, onSubmit, renderOverlay]);
1056
1620
 
1057
1621
  useInput((input, key) => {
1058
1622
  if (overlayEnabled) {