zerg-ztc 0.1.7 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/dist/App.d.ts.map +1 -1
  2. package/dist/App.js +75 -8
  3. package/dist/App.js.map +1 -1
  4. package/dist/agent/agent.d.ts +2 -0
  5. package/dist/agent/agent.d.ts.map +1 -1
  6. package/dist/agent/agent.js +111 -10
  7. package/dist/agent/agent.js.map +1 -1
  8. package/dist/agent/backends/anthropic.d.ts.map +1 -1
  9. package/dist/agent/backends/anthropic.js +15 -3
  10. package/dist/agent/backends/anthropic.js.map +1 -1
  11. package/dist/agent/backends/gemini.d.ts.map +1 -1
  12. package/dist/agent/backends/gemini.js +12 -0
  13. package/dist/agent/backends/gemini.js.map +1 -1
  14. package/dist/agent/backends/index.d.ts +1 -1
  15. package/dist/agent/backends/index.d.ts.map +1 -1
  16. package/dist/agent/backends/openai_compatible.d.ts.map +1 -1
  17. package/dist/agent/backends/openai_compatible.js +12 -0
  18. package/dist/agent/backends/openai_compatible.js.map +1 -1
  19. package/dist/agent/backends/types.d.ts +21 -1
  20. package/dist/agent/backends/types.d.ts.map +1 -1
  21. package/dist/agent/commands/dictation.d.ts +3 -0
  22. package/dist/agent/commands/dictation.d.ts.map +1 -0
  23. package/dist/agent/commands/dictation.js +10 -0
  24. package/dist/agent/commands/dictation.js.map +1 -0
  25. package/dist/agent/commands/index.d.ts.map +1 -1
  26. package/dist/agent/commands/index.js +2 -1
  27. package/dist/agent/commands/index.js.map +1 -1
  28. package/dist/agent/commands/types.d.ts +7 -0
  29. package/dist/agent/commands/types.d.ts.map +1 -1
  30. package/dist/agent/runtime/capabilities.d.ts +2 -1
  31. package/dist/agent/runtime/capabilities.d.ts.map +1 -1
  32. package/dist/agent/runtime/capabilities.js +1 -0
  33. package/dist/agent/runtime/capabilities.js.map +1 -1
  34. package/dist/agent/tools/index.d.ts +1 -0
  35. package/dist/agent/tools/index.d.ts.map +1 -1
  36. package/dist/agent/tools/index.js +6 -1
  37. package/dist/agent/tools/index.js.map +1 -1
  38. package/dist/agent/tools/screenshot.d.ts +23 -0
  39. package/dist/agent/tools/screenshot.d.ts.map +1 -0
  40. package/dist/agent/tools/screenshot.js +735 -0
  41. package/dist/agent/tools/screenshot.js.map +1 -0
  42. package/dist/components/InputArea.d.ts +1 -0
  43. package/dist/components/InputArea.d.ts.map +1 -1
  44. package/dist/components/InputArea.js +591 -43
  45. package/dist/components/InputArea.js.map +1 -1
  46. package/dist/components/SingleMessage.d.ts.map +1 -1
  47. package/dist/components/SingleMessage.js +157 -7
  48. package/dist/components/SingleMessage.js.map +1 -1
  49. package/dist/config/types.d.ts +6 -0
  50. package/dist/config/types.d.ts.map +1 -1
  51. package/dist/ui/views/status_bar.js +2 -2
  52. package/dist/ui/views/status_bar.js.map +1 -1
  53. package/dist/utils/dictation.d.ts +46 -0
  54. package/dist/utils/dictation.d.ts.map +1 -0
  55. package/dist/utils/dictation.js +409 -0
  56. package/dist/utils/dictation.js.map +1 -0
  57. package/dist/utils/dictation_native.d.ts +51 -0
  58. package/dist/utils/dictation_native.d.ts.map +1 -0
  59. package/dist/utils/dictation_native.js +216 -0
  60. package/dist/utils/dictation_native.js.map +1 -0
  61. package/dist/utils/path_complete.d.ts.map +1 -1
  62. package/dist/utils/path_complete.js +31 -6
  63. package/dist/utils/path_complete.js.map +1 -1
  64. package/dist/utils/path_format.d.ts +20 -0
  65. package/dist/utils/path_format.d.ts.map +1 -0
  66. package/dist/utils/path_format.js +90 -0
  67. package/dist/utils/path_format.js.map +1 -0
  68. package/dist/utils/table.d.ts +38 -0
  69. package/dist/utils/table.d.ts.map +1 -0
  70. package/dist/utils/table.js +133 -0
  71. package/dist/utils/table.js.map +1 -0
  72. package/dist/utils/tool_trace.d.ts +7 -2
  73. package/dist/utils/tool_trace.d.ts.map +1 -1
  74. package/dist/utils/tool_trace.js +156 -51
  75. package/dist/utils/tool_trace.js.map +1 -1
  76. package/package.json +4 -1
  77. package/packages/ztc-dictation/Cargo.toml +43 -0
  78. package/packages/ztc-dictation/README.md +65 -0
  79. package/packages/ztc-dictation/bin/.gitkeep +0 -0
  80. package/packages/ztc-dictation/index.d.ts +16 -0
  81. package/packages/ztc-dictation/index.js +74 -0
  82. package/packages/ztc-dictation/package.json +41 -0
  83. package/packages/ztc-dictation/src/main.rs +430 -0
  84. package/src/App.tsx +110 -7
  85. package/src/agent/agent.ts +116 -11
  86. package/src/agent/backends/anthropic.ts +15 -5
  87. package/src/agent/backends/gemini.ts +12 -0
  88. package/src/agent/backends/index.ts +1 -0
  89. package/src/agent/backends/openai_compatible.ts +12 -0
  90. package/src/agent/backends/types.ts +25 -1
  91. package/src/agent/commands/dictation.ts +11 -0
  92. package/src/agent/commands/index.ts +2 -0
  93. package/src/agent/commands/types.ts +8 -0
  94. package/src/agent/runtime/capabilities.ts +2 -1
  95. package/src/agent/tools/index.ts +6 -1
  96. package/src/agent/tools/screenshot.ts +821 -0
  97. package/src/components/InputArea.tsx +606 -42
  98. package/src/components/SingleMessage.tsx +248 -9
  99. package/src/config/types.ts +7 -0
  100. package/src/ui/views/status_bar.ts +2 -2
  101. package/src/utils/dictation.ts +467 -0
  102. package/src/utils/dictation_native.ts +258 -0
  103. package/src/utils/path_complete.ts +30 -4
  104. package/src/utils/path_format.ts +99 -0
  105. package/src/utils/table.ts +171 -0
  106. package/src/utils/tool_trace.ts +184 -54
@@ -8,7 +8,35 @@ import chalk from 'chalk';
8
8
  import { saveClipboardImage } from '../utils/clipboard_image.js';
9
9
  import { renderImagePreview } from '../utils/image_preview.js';
10
10
  import { completePath } from '../utils/path_complete.js';
11
+ import { isRecording as isLegacyRecording, startRecording as startLegacyRecording, stopRecordingAndTranscribe as stopLegacyRecording, isDictationAvailable as isLegacyDictationAvailable } from '../utils/dictation.js';
12
+ import { isNativeDictationAvailable, isNativeRecording, startNativeRecording, stopNativeRecording } from '../utils/dictation_native.js';
13
+ // Use native dictation if available, otherwise fall back to legacy
14
+ const useNative = isNativeDictationAvailable();
15
+ const isRecording = () => useNative ? isNativeRecording() : isLegacyRecording();
16
+ const isDictationAvailable = () => useNative ? true : isLegacyDictationAvailable();
11
17
  import { createEmptyState, insertText, insertBadge, backspace, deleteForward, moveLeft, moveRight, moveWordLeft, moveWordRight, getPlainText, serializeSegments, PASTE_BADGE_THRESHOLD } from '../ui/core/input_segments.js';
18
+ // Helper to check for Ctrl key combinations
19
+ // Handles multiple formats:
20
+ // 1. Ink's key.ctrl + letter
21
+ // 2. Traditional control characters (\x01 for Ctrl+A, etc.)
22
+ // 3. Kitty keyboard protocol: [<keycode>;5u where keycode is ASCII, 5 = Ctrl
23
+ function isCtrl(input, key, letter) {
24
+ const lowerLetter = letter.toLowerCase();
25
+ // Method 1: Ink's key.ctrl flag
26
+ if (key.ctrl && input === lowerLetter)
27
+ return true;
28
+ // Method 2: Traditional control character (Ctrl+A = \x01, etc.)
29
+ const ctrlCode = lowerLetter.charCodeAt(0) - 96; // 'a' -> 1, 'b' -> 2, etc.
30
+ if (input === String.fromCharCode(ctrlCode))
31
+ return true;
32
+ // Method 3: Kitty keyboard protocol [<keycode>;5u (fallback, main handling is in handleKittyInput)
33
+ // keycode is ASCII code of the letter, 5 = Ctrl modifier
34
+ const asciiCode = lowerLetter.charCodeAt(0); // 'a' -> 97, 'r' -> 114, etc.
35
+ const kittyPattern = `[${asciiCode};5u`;
36
+ if (input === kittyPattern || input === `\x1b${kittyPattern}`)
37
+ return true;
38
+ return false;
39
+ }
12
40
  const initialState = createEmptyState();
13
41
  function reducer(state, action) {
14
42
  switch (action.type) {
@@ -58,12 +86,13 @@ function reducer(state, action) {
58
86
  return state;
59
87
  }
60
88
  }
61
- export const InputArea = ({ onSubmit, onCommand, commands = [], onStateChange, onToast, cols = process.stdout.columns || 80, inputBus, disabled = false, placeholder = 'Type a message...', historyEnabled = true, debug = false, cwd = process.cwd() }) => {
89
+ export const InputArea = ({ onSubmit, onCommand, commands = [], onStateChange, onToast, onDictationStateChange, cols = process.stdout.columns || 80, inputBus, disabled = false, placeholder = 'Type a message...', historyEnabled = true, debug = false, cwd = process.cwd() }) => {
62
90
  const [state, dispatch] = useReducer(reducer, initialState);
63
91
  const stateRef = React.useRef(state);
64
92
  const [badgePreview, setBadgePreview] = React.useState(null);
65
93
  const killRingRef = React.useRef([]);
66
94
  const killIndexRef = React.useRef(-1);
95
+ const dictationBusyRef = React.useRef(false);
67
96
  // Bracketed paste mode support - buffer paste content between \x1b[200~ and \x1b[201~
68
97
  const pasteBufferRef = React.useRef('');
69
98
  const isPastingRef = React.useRef(false);
@@ -424,6 +453,165 @@ export const InputArea = ({ onSubmit, onCommand, commands = [], onStateChange, o
424
453
  }, [insertTextIntoOverlay, insertTextIntoState, onToast, renderOverlay]);
425
454
  // Update ref so handleSubmit can access it
426
455
  handleClipboardImagePasteRef.current = handleClipboardImagePaste;
456
+ // Handle Kitty keyboard protocol sequences directly
457
+ // This is called when we detect a Kitty sequence in handleInput
458
+ const handleKittyInput = useCallback((char, key, keycode, modifier) => {
459
+ if (disabled)
460
+ return;
461
+ const isKittyCtrl = modifier === 5;
462
+ const isKittyMeta = modifier === 9;
463
+ const lowerChar = char.toLowerCase();
464
+ // Navigation
465
+ if (isKittyCtrl && lowerChar === 'a') {
466
+ dispatch({ type: 'apply', state: { cursor: { index: 0, offset: 0 } } });
467
+ return;
468
+ }
469
+ if (isKittyCtrl && lowerChar === 'e') {
470
+ dispatch({ type: 'apply', state: { cursor: { index: state.segments.length, offset: 0 } } });
471
+ return;
472
+ }
473
+ if (isKittyCtrl && lowerChar === 'b') {
474
+ dispatch({ type: 'apply', state: moveLeft(state) });
475
+ return;
476
+ }
477
+ if (isKittyCtrl && lowerChar === 'f') {
478
+ dispatch({ type: 'apply', state: moveRight(state) });
479
+ return;
480
+ }
481
+ if (isKittyCtrl && lowerChar === 'p') {
482
+ navigateHistory('up');
483
+ return;
484
+ }
485
+ if (isKittyCtrl && lowerChar === 'n') {
486
+ navigateHistory('down');
487
+ return;
488
+ }
489
+ // Kill/yank
490
+ if (isKittyCtrl && lowerChar === 'u') {
491
+ const { next, killed } = killToStart(state);
492
+ pushKill(killed);
493
+ dispatch({ type: 'apply', state: next });
494
+ return;
495
+ }
496
+ if (isKittyCtrl && lowerChar === 'k') {
497
+ const { next, killed } = killToEnd(state);
498
+ pushKill(killed);
499
+ dispatch({ type: 'apply', state: next });
500
+ return;
501
+ }
502
+ if (isKittyCtrl && lowerChar === 'y') {
503
+ dispatch({ type: 'apply', state: yank(state) });
504
+ return;
505
+ }
506
+ if (isKittyCtrl && lowerChar === 'w') {
507
+ const { next, killed } = killWordBackward(state);
508
+ pushKill(killed);
509
+ dispatch({ type: 'apply', state: next });
510
+ return;
511
+ }
512
+ // Transpose
513
+ if (isKittyCtrl && lowerChar === 't') {
514
+ dispatch({ type: 'apply', state: transposeChars(state) });
515
+ return;
516
+ }
517
+ // Delete forward
518
+ if (isKittyCtrl && lowerChar === 'd') {
519
+ dispatch({ type: 'apply', state: deleteForward(state) });
520
+ return;
521
+ }
522
+ // Push-to-talk: Ctrl+R to toggle recording
523
+ if (isKittyCtrl && lowerChar === 'r') {
524
+ if (dictationBusyRef.current)
525
+ return;
526
+ if (isRecording()) {
527
+ // Stop recording and transcribe
528
+ dictationBusyRef.current = true;
529
+ onDictationStateChange?.('transcribing');
530
+ if (useNative) {
531
+ stopNativeRecording()
532
+ .then((text) => {
533
+ if (text && text.trim()) {
534
+ // Submit the transcribed text directly
535
+ onSubmit(text.trim());
536
+ }
537
+ else {
538
+ onToast?.('No speech detected');
539
+ }
540
+ })
541
+ .catch((err) => {
542
+ onToast?.(`Dictation error: ${err.message}`);
543
+ })
544
+ .finally(() => {
545
+ dictationBusyRef.current = false;
546
+ onDictationStateChange?.('idle');
547
+ });
548
+ }
549
+ else {
550
+ stopLegacyRecording()
551
+ .then((result) => {
552
+ if (result.text && result.text.trim()) {
553
+ // Submit the transcribed text directly
554
+ onSubmit(result.text.trim());
555
+ }
556
+ else {
557
+ onToast?.('No speech detected');
558
+ }
559
+ })
560
+ .catch((err) => {
561
+ onToast?.(`Dictation error: ${err.message}`);
562
+ })
563
+ .finally(() => {
564
+ dictationBusyRef.current = false;
565
+ onDictationStateChange?.('idle');
566
+ });
567
+ }
568
+ }
569
+ else {
570
+ // Start recording
571
+ if (!isDictationAvailable()) {
572
+ onToast?.('Dictation not available. Build native/ztc-audio or install sox');
573
+ return;
574
+ }
575
+ try {
576
+ if (useNative) {
577
+ startNativeRecording({ model: 'tiny' });
578
+ }
579
+ else {
580
+ startLegacyRecording();
581
+ }
582
+ onDictationStateChange?.('recording');
583
+ }
584
+ catch (err) {
585
+ onToast?.(`Recording error: ${err instanceof Error ? err.message : 'Unknown'}`);
586
+ }
587
+ }
588
+ return;
589
+ }
590
+ // Meta key combinations
591
+ if (isKittyMeta && lowerChar === 'y') {
592
+ dispatch({ type: 'apply', state: yankPop(state) });
593
+ return;
594
+ }
595
+ if (isKittyMeta && lowerChar === 't') {
596
+ dispatch({ type: 'apply', state: transposeWords(state) });
597
+ return;
598
+ }
599
+ if (isKittyMeta && lowerChar === 'd') {
600
+ const { next, killed } = killWordForward(state);
601
+ pushKill(killed);
602
+ dispatch({ type: 'apply', state: next });
603
+ return;
604
+ }
605
+ if (isKittyMeta && lowerChar === 'b') {
606
+ dispatch({ type: 'apply', state: moveWordLeft(state) });
607
+ return;
608
+ }
609
+ if (isKittyMeta && lowerChar === 'f') {
610
+ dispatch({ type: 'apply', state: moveWordRight(state) });
611
+ return;
612
+ }
613
+ // Unknown Kitty sequence - don't insert as text
614
+ }, [disabled, killToEnd, killToStart, killWordBackward, killWordForward, navigateHistory, onDictationStateChange, onToast, pushKill, state, transposeChars, transposeWords, yank, yankPop]);
427
615
  const handleInput = useCallback((input, key) => {
428
616
  // Detect Kitty keyboard protocol CSI u sequences
429
617
  // Format: ESC [ <keycode> ; <modifiers> u
@@ -435,23 +623,33 @@ export const InputArea = ({ onSubmit, onCommand, commands = [], onStateChange, o
435
623
  void handleClipboardImagePaste('state');
436
624
  return;
437
625
  }
438
- // Consume any other Kitty sequences to prevent them from being displayed
439
- // Match pattern: ESC? [ number ; number u
440
- if (/\x1b?\[\d+;\d+u/.test(input)) {
441
- // This is a Kitty keyboard sequence - don't display it as text
442
- // Extract what key it is and handle accordingly
443
- const match = input.match(/\x1b?\[(\d+);(\d+)u/);
444
- if (match) {
445
- const keycode = parseInt(match[1], 10);
446
- const modifier = parseInt(match[2], 10);
447
- // Ctrl+C (99;5) or Cmd+C (99;9) - exit the app
448
- if (keycode === 99 && (modifier === 5 || modifier === 9)) {
449
- exit();
450
- return;
451
- }
452
- // Ctrl+L (108;5) - could add clear screen here if needed
626
+ // Handle Kitty keyboard protocol sequences
627
+ // Format: ESC? [ keycode ; modifier u
628
+ // Modifier 5 = Ctrl, modifier 9 = Cmd/Super
629
+ const kittyMatch = input.match(/\x1b?\[(\d+);(\d+)u/);
630
+ if (kittyMatch) {
631
+ const keycode = parseInt(kittyMatch[1], 10);
632
+ const modifier = parseInt(kittyMatch[2], 10);
633
+ const isKittyCtrl = modifier === 5;
634
+ const isKittyMeta = modifier === 9;
635
+ // Ctrl+C (99;5) or Cmd+C (99;9) - exit the app
636
+ if (keycode === 99 && (isKittyCtrl || isKittyMeta)) {
637
+ exit();
638
+ return;
453
639
  }
454
- return; // Consume other Kitty sequences
640
+ // Synthesize key flags for Kitty sequences so downstream handlers work
641
+ const kittyKey = {
642
+ ...key,
643
+ ctrl: isKittyCtrl || key.ctrl,
644
+ meta: isKittyMeta || key.meta,
645
+ };
646
+ // Convert keycode to character for isCtrl checks
647
+ const kittyChar = String.fromCharCode(keycode);
648
+ // Route to handlers based on keycode
649
+ // Let the normal handler flow process this with synthesized key flags
650
+ // by falling through with modified key/input
651
+ handleKittyInput(kittyChar, kittyKey, keycode, modifier);
652
+ return;
455
653
  }
456
654
  if (disabled)
457
655
  return;
@@ -601,27 +799,27 @@ export const InputArea = ({ onSubmit, onCommand, commands = [], onStateChange, o
601
799
  }
602
800
  return;
603
801
  }
604
- if (key.ctrl && input === 'a') {
802
+ if (isCtrl(input, key, 'a')) {
605
803
  dispatch({ type: 'apply', state: { cursor: { index: 0, offset: 0 } } });
606
804
  return;
607
805
  }
608
- if (key.ctrl && input === 'e') {
806
+ if (isCtrl(input, key, 'e')) {
609
807
  dispatch({ type: 'apply', state: { cursor: { index: state.segments.length, offset: 0 } } });
610
808
  return;
611
809
  }
612
- if (key.ctrl && input === 'b') {
810
+ if (isCtrl(input, key, 'b')) {
613
811
  dispatch({ type: 'apply', state: moveLeft(state) });
614
812
  return;
615
813
  }
616
- if (key.ctrl && input === 'f') {
814
+ if (isCtrl(input, key, 'f')) {
617
815
  dispatch({ type: 'apply', state: moveRight(state) });
618
816
  return;
619
817
  }
620
- if (key.ctrl && input === 'p') {
818
+ if (isCtrl(input, key, 'p')) {
621
819
  navigateHistory('up');
622
820
  return;
623
821
  }
624
- if (key.ctrl && input === 'n') {
822
+ if (isCtrl(input, key, 'n')) {
625
823
  navigateHistory('down');
626
824
  return;
627
825
  }
@@ -631,19 +829,19 @@ export const InputArea = ({ onSubmit, onCommand, commands = [], onStateChange, o
631
829
  dispatch({ type: 'apply', state: next });
632
830
  return;
633
831
  }
634
- if (key.ctrl && input === 'u') {
832
+ if (isCtrl(input, key, 'u')) {
635
833
  const { next, killed } = killToStart(state);
636
834
  pushKill(killed);
637
835
  dispatch({ type: 'apply', state: next });
638
836
  return;
639
837
  }
640
- if (key.ctrl && input === 'k') {
838
+ if (isCtrl(input, key, 'k')) {
641
839
  const { next, killed } = killToEnd(state);
642
840
  pushKill(killed);
643
841
  dispatch({ type: 'apply', state: next });
644
842
  return;
645
843
  }
646
- if (key.ctrl && input === 'y') {
844
+ if (isCtrl(input, key, 'y')) {
647
845
  dispatch({ type: 'apply', state: yank(state) });
648
846
  return;
649
847
  }
@@ -651,7 +849,75 @@ export const InputArea = ({ onSubmit, onCommand, commands = [], onStateChange, o
651
849
  dispatch({ type: 'apply', state: yankPop(state) });
652
850
  return;
653
851
  }
654
- if (key.ctrl && input === 't') {
852
+ // Push-to-talk: Ctrl+R to toggle recording
853
+ if (isCtrl(input, key, 'r')) {
854
+ if (dictationBusyRef.current)
855
+ return;
856
+ if (isRecording()) {
857
+ // Stop recording and transcribe
858
+ dictationBusyRef.current = true;
859
+ onDictationStateChange?.('transcribing');
860
+ if (useNative) {
861
+ stopNativeRecording()
862
+ .then((text) => {
863
+ if (text && text.trim()) {
864
+ // Submit the transcribed text directly
865
+ onSubmit(text.trim());
866
+ }
867
+ else {
868
+ onToast?.('No speech detected');
869
+ }
870
+ })
871
+ .catch((err) => {
872
+ onToast?.(`Dictation error: ${err.message}`);
873
+ })
874
+ .finally(() => {
875
+ dictationBusyRef.current = false;
876
+ onDictationStateChange?.('idle');
877
+ });
878
+ }
879
+ else {
880
+ stopLegacyRecording()
881
+ .then((result) => {
882
+ if (result.text && result.text.trim()) {
883
+ // Submit the transcribed text directly
884
+ onSubmit(result.text.trim());
885
+ }
886
+ else {
887
+ onToast?.('No speech detected');
888
+ }
889
+ })
890
+ .catch((err) => {
891
+ onToast?.(`Dictation error: ${err.message}`);
892
+ })
893
+ .finally(() => {
894
+ dictationBusyRef.current = false;
895
+ onDictationStateChange?.('idle');
896
+ });
897
+ }
898
+ }
899
+ else {
900
+ // Start recording
901
+ if (!isDictationAvailable()) {
902
+ onToast?.('Dictation not available. Build native/ztc-audio or install sox');
903
+ return;
904
+ }
905
+ try {
906
+ if (useNative) {
907
+ startNativeRecording({ model: 'tiny' });
908
+ }
909
+ else {
910
+ startLegacyRecording();
911
+ }
912
+ onDictationStateChange?.('recording');
913
+ }
914
+ catch (err) {
915
+ onToast?.(`Recording error: ${err instanceof Error ? err.message : 'Unknown'}`);
916
+ }
917
+ }
918
+ return;
919
+ }
920
+ if (isCtrl(input, key, 't')) {
655
921
  dispatch({ type: 'apply', state: transposeChars(state) });
656
922
  return;
657
923
  }
@@ -659,11 +925,11 @@ export const InputArea = ({ onSubmit, onCommand, commands = [], onStateChange, o
659
925
  dispatch({ type: 'apply', state: transposeWords(state) });
660
926
  return;
661
927
  }
662
- if (key.ctrl && input === 'd') {
928
+ if (isCtrl(input, key, 'd')) {
663
929
  dispatch({ type: 'apply', state: deleteForward(state) });
664
930
  return;
665
931
  }
666
- if (key.ctrl && input === 'w') {
932
+ if (isCtrl(input, key, 'w')) {
667
933
  const { next, killed } = killWordBackward(state);
668
934
  pushKill(killed);
669
935
  dispatch({ type: 'apply', state: next });
@@ -701,10 +967,222 @@ export const InputArea = ({ onSubmit, onCommand, commands = [], onStateChange, o
701
967
  const next = insertText(state, input);
702
968
  dispatch({ type: 'apply', state: { ...next, historyIdx: -1 } });
703
969
  }
704
- }, [disabled, exit, handleClipboardImagePaste, handleSubmit, navigateHistory, state]);
970
+ }, [disabled, exit, handleClipboardImagePaste, handleKittyInput, handleSubmit, navigateHistory, state]);
971
+ // Handle Kitty keyboard protocol sequences for overlay mode
972
+ const handleKittyOverlayInput = useCallback((char, key, keycode, modifier) => {
973
+ if (disabled)
974
+ return;
975
+ const current = overlayStateRef.current;
976
+ const isKittyCtrl = modifier === 5;
977
+ const isKittyMeta = modifier === 9;
978
+ const lowerChar = char.toLowerCase();
979
+ // Navigation
980
+ if (isKittyCtrl && lowerChar === 'a') {
981
+ overlayStateRef.current = { ...current, cursor: { index: 0, offset: 0 } };
982
+ renderOverlay(overlayStateRef.current);
983
+ return;
984
+ }
985
+ if (isKittyCtrl && lowerChar === 'e') {
986
+ overlayStateRef.current = { ...current, cursor: { index: current.segments.length, offset: 0 } };
987
+ renderOverlay(overlayStateRef.current);
988
+ return;
989
+ }
990
+ if (isKittyCtrl && lowerChar === 'b') {
991
+ overlayStateRef.current = moveLeft(current);
992
+ renderOverlay(overlayStateRef.current);
993
+ return;
994
+ }
995
+ if (isKittyCtrl && lowerChar === 'f') {
996
+ overlayStateRef.current = moveRight(current);
997
+ renderOverlay(overlayStateRef.current);
998
+ return;
999
+ }
1000
+ if (isKittyCtrl && lowerChar === 'p') {
1001
+ if (historyEnabled && current.history.length > 0) {
1002
+ const newIdx = current.historyIdx === -1 ? current.history.length - 1 : Math.max(0, current.historyIdx - 1);
1003
+ const historyValue = current.history[newIdx] || '';
1004
+ overlayStateRef.current = {
1005
+ ...current,
1006
+ historyIdx: newIdx,
1007
+ segments: historyValue.length > 0 ? [{ type: 'text', text: historyValue }] : [],
1008
+ cursor: { index: historyValue.length > 0 ? 0 : 0, offset: historyValue.length }
1009
+ };
1010
+ renderOverlay(overlayStateRef.current);
1011
+ }
1012
+ return;
1013
+ }
1014
+ if (isKittyCtrl && lowerChar === 'n') {
1015
+ if (historyEnabled && current.history.length > 0) {
1016
+ let newIdx = current.historyIdx === -1 ? -1 : current.historyIdx + 1;
1017
+ if (newIdx >= current.history.length)
1018
+ newIdx = -1;
1019
+ if (newIdx === -1) {
1020
+ overlayStateRef.current = { ...createEmptyState(), history: current.history, historyIdx: -1 };
1021
+ }
1022
+ else {
1023
+ const historyValue = current.history[newIdx];
1024
+ overlayStateRef.current = {
1025
+ ...current,
1026
+ historyIdx: newIdx,
1027
+ segments: historyValue.length > 0 ? [{ type: 'text', text: historyValue }] : [],
1028
+ cursor: { index: historyValue.length > 0 ? 0 : 0, offset: historyValue.length }
1029
+ };
1030
+ }
1031
+ renderOverlay(overlayStateRef.current);
1032
+ }
1033
+ return;
1034
+ }
1035
+ // Kill/yank
1036
+ if (isKittyCtrl && lowerChar === 'u') {
1037
+ const { next, killed } = killToStart(current);
1038
+ pushKill(killed);
1039
+ overlayStateRef.current = { ...next, history: current.history, historyIdx: -1 };
1040
+ renderOverlay(overlayStateRef.current);
1041
+ return;
1042
+ }
1043
+ if (isKittyCtrl && lowerChar === 'k') {
1044
+ const { next, killed } = killToEnd(current);
1045
+ pushKill(killed);
1046
+ overlayStateRef.current = next;
1047
+ renderOverlay(overlayStateRef.current);
1048
+ return;
1049
+ }
1050
+ if (isKittyCtrl && lowerChar === 'y') {
1051
+ overlayStateRef.current = yank(current);
1052
+ renderOverlay(overlayStateRef.current);
1053
+ return;
1054
+ }
1055
+ if (isKittyCtrl && lowerChar === 'w') {
1056
+ const { next, killed } = killWordBackward(current);
1057
+ pushKill(killed);
1058
+ overlayStateRef.current = next;
1059
+ renderOverlay(overlayStateRef.current);
1060
+ return;
1061
+ }
1062
+ // Transpose
1063
+ if (isKittyCtrl && lowerChar === 't') {
1064
+ overlayStateRef.current = transposeChars(current);
1065
+ renderOverlay(overlayStateRef.current);
1066
+ return;
1067
+ }
1068
+ // Delete forward
1069
+ if (isKittyCtrl && lowerChar === 'd') {
1070
+ overlayStateRef.current = deleteForward(current);
1071
+ renderOverlay(overlayStateRef.current);
1072
+ return;
1073
+ }
1074
+ // Push-to-talk: Ctrl+R to toggle recording
1075
+ if (isKittyCtrl && lowerChar === 'r') {
1076
+ if (dictationBusyRef.current)
1077
+ return;
1078
+ if (isRecording()) {
1079
+ dictationBusyRef.current = true;
1080
+ onDictationStateChange?.('transcribing');
1081
+ if (useNative) {
1082
+ stopNativeRecording()
1083
+ .then((text) => {
1084
+ if (text && text.trim()) {
1085
+ // Submit the transcribed text directly
1086
+ onSubmit(text.trim());
1087
+ overlayStateRef.current = { ...overlayStateRef.current, segments: [], cursor: { index: 0, offset: 0 }, historyIdx: -1 };
1088
+ renderOverlay(overlayStateRef.current);
1089
+ }
1090
+ else {
1091
+ onToast?.('No speech detected');
1092
+ }
1093
+ })
1094
+ .catch((err) => {
1095
+ onToast?.(`Dictation error: ${err.message}`);
1096
+ })
1097
+ .finally(() => {
1098
+ dictationBusyRef.current = false;
1099
+ onDictationStateChange?.('idle');
1100
+ });
1101
+ }
1102
+ else {
1103
+ stopLegacyRecording()
1104
+ .then((result) => {
1105
+ if (result.text && result.text.trim()) {
1106
+ // Submit the transcribed text directly
1107
+ onSubmit(result.text.trim());
1108
+ overlayStateRef.current = { ...overlayStateRef.current, segments: [], cursor: { index: 0, offset: 0 }, historyIdx: -1 };
1109
+ renderOverlay(overlayStateRef.current);
1110
+ }
1111
+ else {
1112
+ onToast?.('No speech detected');
1113
+ }
1114
+ })
1115
+ .catch((err) => {
1116
+ onToast?.(`Dictation error: ${err.message}`);
1117
+ })
1118
+ .finally(() => {
1119
+ dictationBusyRef.current = false;
1120
+ onDictationStateChange?.('idle');
1121
+ });
1122
+ }
1123
+ }
1124
+ else {
1125
+ if (!isDictationAvailable()) {
1126
+ onToast?.('Dictation not available. Build native/ztc-audio or install sox');
1127
+ return;
1128
+ }
1129
+ try {
1130
+ if (useNative) {
1131
+ startNativeRecording({ model: 'tiny' });
1132
+ }
1133
+ else {
1134
+ startLegacyRecording();
1135
+ }
1136
+ onDictationStateChange?.('recording');
1137
+ }
1138
+ catch (err) {
1139
+ onToast?.(`Recording error: ${err instanceof Error ? err.message : 'Unknown'}`);
1140
+ }
1141
+ }
1142
+ return;
1143
+ }
1144
+ // Meta key combinations
1145
+ if (isKittyMeta && lowerChar === 'y') {
1146
+ overlayStateRef.current = yankPop(current);
1147
+ renderOverlay(overlayStateRef.current);
1148
+ return;
1149
+ }
1150
+ if (isKittyMeta && lowerChar === 't') {
1151
+ overlayStateRef.current = transposeWords(current);
1152
+ renderOverlay(overlayStateRef.current);
1153
+ return;
1154
+ }
1155
+ if (isKittyMeta && lowerChar === 'd') {
1156
+ const { next, killed } = killWordForward(current);
1157
+ pushKill(killed);
1158
+ overlayStateRef.current = next;
1159
+ renderOverlay(overlayStateRef.current);
1160
+ return;
1161
+ }
1162
+ if (isKittyMeta && lowerChar === 'b') {
1163
+ overlayStateRef.current = moveWordLeft(current);
1164
+ renderOverlay(overlayStateRef.current);
1165
+ return;
1166
+ }
1167
+ if (isKittyMeta && lowerChar === 'f') {
1168
+ overlayStateRef.current = moveWordRight(current);
1169
+ renderOverlay(overlayStateRef.current);
1170
+ return;
1171
+ }
1172
+ // Unknown Kitty sequence - don't insert as text
1173
+ }, [disabled, historyEnabled, killToEnd, killToStart, killWordBackward, killWordForward, onDictationStateChange, onToast, pushKill, renderOverlay, transposeChars, transposeWords, yank, yankPop]);
705
1174
  const handleOverlayInput = useCallback((input, key) => {
706
1175
  if (disabled)
707
1176
  return;
1177
+ // Handle Kitty keyboard protocol sequences for overlay mode
1178
+ const kittyMatch = input.match(/\x1b?\[(\d+);(\d+)u/);
1179
+ if (kittyMatch) {
1180
+ const keycode = parseInt(kittyMatch[1], 10);
1181
+ const modifier = parseInt(kittyMatch[2], 10);
1182
+ const kittyChar = String.fromCharCode(keycode);
1183
+ handleKittyOverlayInput(kittyChar, key, keycode, modifier);
1184
+ return;
1185
+ }
708
1186
  // Detect backspace via explicit key flag or known control codes
709
1187
  const backspaceFallback = input === '\b' || input === '\x7f' || input === '\x08';
710
1188
  const isBackspace = key.backspace || key.delete || backspaceFallback;
@@ -810,27 +1288,27 @@ export const InputArea = ({ onSubmit, onCommand, commands = [], onStateChange, o
810
1288
  renderOverlay(overlayStateRef.current);
811
1289
  return;
812
1290
  }
813
- if (key.ctrl && input === 'a') {
1291
+ if (isCtrl(input, key, 'a')) {
814
1292
  overlayStateRef.current = { ...current, cursor: { index: 0, offset: 0 } };
815
1293
  renderOverlay(overlayStateRef.current);
816
1294
  return;
817
1295
  }
818
- if (key.ctrl && input === 'e') {
1296
+ if (isCtrl(input, key, 'e')) {
819
1297
  overlayStateRef.current = { ...current, cursor: { index: current.segments.length, offset: 0 } };
820
1298
  renderOverlay(overlayStateRef.current);
821
1299
  return;
822
1300
  }
823
- if (key.ctrl && input === 'b') {
1301
+ if (isCtrl(input, key, 'b')) {
824
1302
  overlayStateRef.current = moveLeft(current);
825
1303
  renderOverlay(overlayStateRef.current);
826
1304
  return;
827
1305
  }
828
- if (key.ctrl && input === 'f') {
1306
+ if (isCtrl(input, key, 'f')) {
829
1307
  overlayStateRef.current = moveRight(current);
830
1308
  renderOverlay(overlayStateRef.current);
831
1309
  return;
832
1310
  }
833
- if (key.ctrl && input === 'p') {
1311
+ if (isCtrl(input, key, 'p')) {
834
1312
  const direction = 'up';
835
1313
  if (historyEnabled && current.history.length > 0) {
836
1314
  let newIdx = current.historyIdx === -1 ? current.history.length - 1 : Math.max(0, current.historyIdx - 1);
@@ -845,7 +1323,7 @@ export const InputArea = ({ onSubmit, onCommand, commands = [], onStateChange, o
845
1323
  }
846
1324
  return;
847
1325
  }
848
- if (key.ctrl && input === 'n') {
1326
+ if (isCtrl(input, key, 'n')) {
849
1327
  if (historyEnabled && current.history.length > 0) {
850
1328
  let newIdx = current.historyIdx === -1 ? -1 : current.historyIdx + 1;
851
1329
  if (newIdx >= current.history.length)
@@ -872,31 +1350,101 @@ export const InputArea = ({ onSubmit, onCommand, commands = [], onStateChange, o
872
1350
  renderOverlay(overlayStateRef.current);
873
1351
  return;
874
1352
  }
875
- if (key.ctrl && input === 'u') {
1353
+ if (isCtrl(input, key, 'u')) {
876
1354
  const { next, killed } = killToStart(current);
877
1355
  pushKill(killed);
878
1356
  overlayStateRef.current = { ...next, history: current.history, historyIdx: -1 };
879
1357
  renderOverlay(overlayStateRef.current);
880
1358
  return;
881
1359
  }
882
- if (key.ctrl && input === 'k') {
1360
+ if (isCtrl(input, key, 'k')) {
883
1361
  const { next, killed } = killToEnd(current);
884
1362
  pushKill(killed);
885
1363
  overlayStateRef.current = next;
886
1364
  renderOverlay(overlayStateRef.current);
887
1365
  return;
888
1366
  }
889
- if (key.ctrl && input === 'y') {
1367
+ if (isCtrl(input, key, 'y')) {
890
1368
  overlayStateRef.current = yank(current);
891
1369
  renderOverlay(overlayStateRef.current);
892
1370
  return;
893
1371
  }
1372
+ // Push-to-talk: Ctrl+R to toggle recording (overlay mode)
1373
+ if (isCtrl(input, key, 'r')) {
1374
+ if (dictationBusyRef.current)
1375
+ return;
1376
+ if (isRecording()) {
1377
+ dictationBusyRef.current = true;
1378
+ onDictationStateChange?.('transcribing');
1379
+ if (useNative) {
1380
+ stopNativeRecording()
1381
+ .then((text) => {
1382
+ if (text && text.trim()) {
1383
+ // Submit the transcribed text directly
1384
+ onSubmit(text.trim());
1385
+ overlayStateRef.current = { ...overlayStateRef.current, segments: [], cursor: { index: 0, offset: 0 }, historyIdx: -1 };
1386
+ renderOverlay(overlayStateRef.current);
1387
+ }
1388
+ else {
1389
+ onToast?.('No speech detected');
1390
+ }
1391
+ })
1392
+ .catch((err) => {
1393
+ onToast?.(`Dictation error: ${err.message}`);
1394
+ })
1395
+ .finally(() => {
1396
+ dictationBusyRef.current = false;
1397
+ onDictationStateChange?.('idle');
1398
+ });
1399
+ }
1400
+ else {
1401
+ stopLegacyRecording()
1402
+ .then((result) => {
1403
+ if (result.text && result.text.trim()) {
1404
+ // Submit the transcribed text directly
1405
+ onSubmit(result.text.trim());
1406
+ overlayStateRef.current = { ...overlayStateRef.current, segments: [], cursor: { index: 0, offset: 0 }, historyIdx: -1 };
1407
+ renderOverlay(overlayStateRef.current);
1408
+ }
1409
+ else {
1410
+ onToast?.('No speech detected');
1411
+ }
1412
+ })
1413
+ .catch((err) => {
1414
+ onToast?.(`Dictation error: ${err.message}`);
1415
+ })
1416
+ .finally(() => {
1417
+ dictationBusyRef.current = false;
1418
+ onDictationStateChange?.('idle');
1419
+ });
1420
+ }
1421
+ }
1422
+ else {
1423
+ if (!isDictationAvailable()) {
1424
+ onToast?.('Dictation not available. Build native/ztc-audio or install sox');
1425
+ return;
1426
+ }
1427
+ try {
1428
+ if (useNative) {
1429
+ startNativeRecording({ model: 'tiny' });
1430
+ }
1431
+ else {
1432
+ startLegacyRecording();
1433
+ }
1434
+ onDictationStateChange?.('recording');
1435
+ }
1436
+ catch (err) {
1437
+ onToast?.(`Recording error: ${err instanceof Error ? err.message : 'Unknown'}`);
1438
+ }
1439
+ }
1440
+ return;
1441
+ }
894
1442
  if (key.meta && input === 'y') {
895
1443
  overlayStateRef.current = yankPop(current);
896
1444
  renderOverlay(overlayStateRef.current);
897
1445
  return;
898
1446
  }
899
- if (key.ctrl && input === 't') {
1447
+ if (isCtrl(input, key, 't')) {
900
1448
  overlayStateRef.current = transposeChars(current);
901
1449
  renderOverlay(overlayStateRef.current);
902
1450
  return;
@@ -906,12 +1454,12 @@ export const InputArea = ({ onSubmit, onCommand, commands = [], onStateChange, o
906
1454
  renderOverlay(overlayStateRef.current);
907
1455
  return;
908
1456
  }
909
- if (key.ctrl && input === 'd') {
1457
+ if (isCtrl(input, key, 'd')) {
910
1458
  overlayStateRef.current = deleteForward(current);
911
1459
  renderOverlay(overlayStateRef.current);
912
1460
  return;
913
1461
  }
914
- if (key.ctrl && input === 'w') {
1462
+ if (isCtrl(input, key, 'w')) {
915
1463
  const { next, killed } = killWordBackward(current);
916
1464
  pushKill(killed);
917
1465
  overlayStateRef.current = next;
@@ -958,7 +1506,7 @@ export const InputArea = ({ onSubmit, onCommand, commands = [], onStateChange, o
958
1506
  overlayStateRef.current = { ...overlayStateRef.current, historyIdx: -1 };
959
1507
  renderOverlay(overlayStateRef.current);
960
1508
  }
961
- }, [disabled, historyEnabled, onCommand, onSubmit, renderOverlay]);
1509
+ }, [disabled, handleKittyOverlayInput, historyEnabled, onCommand, onSubmit, renderOverlay]);
962
1510
  useInput((input, key) => {
963
1511
  if (overlayEnabled) {
964
1512
  handleOverlayInput(input, key);