npm - @entros/pulse-sdk - Versions diffs - 1.5.1 → 1.5.3 - Mend

@entros/pulse-sdk 1.5.1 → 1.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.d.mts CHANGED Viewed

@@ -134,8 +134,8 @@ interface VerificationResult {
      * Server-side safe-reveal (validator → executor → SDK):
      *   - `variance_floor`, `entropy_bounds`, `temporal_coupling_low`,
      *     `phrase_content_mismatch`
-     *   Surfaced for the soft-reject + retry UX (master-list #94) so the
-     *   UI can render a per-category hint.
+     *   Surfaced for the soft-reject + retry UX so the UI can render a
+     *   per-category hint.
      *
      * Client-side (SDK-emitted):
      *   - `validation_unavailable` — the relayer's `/validate-features`
@@ -360,17 +360,17 @@ declare const SPEAKER_FEATURE_COUNT = 44;
 /**
  * Extract speaker-dependent audio features.
  *
- * Captures physiological vocal characteristics (F0, jitter, shimmer, HNR, formant
- * ratios) that are stable across different utterances from the same speaker.
- * Content-independent by design — different phrases produce similar feature values.
+ * Captures physiological vocal characteristics that are stable across
+ * different utterances from the same speaker. Content-independent by
+ * design — different phrases produce similar feature values.
  *
  * Returns 44 values.
  */
 /**
  * Extracts 44 speaker features AND the raw F0 contour.
- * The F0 contour is surfaced so Tier 2 cross-modal temporal analysis can be
- * performed server-side against the motion time-series. Feature vector shape
- * and semantics are unchanged.
+ * The F0 contour is surfaced so server-side analysis can pair it with
+ * the motion time-series. Feature vector shape and semantics are
+ * unchanged.
  */
 declare function extractSpeakerFeaturesDetailed(audio: AudioCapture): Promise<{
     features: number[];
@@ -379,15 +379,15 @@ declare function extractSpeakerFeaturesDetailed(audio: AudioCapture): Promise<{
 /**
  * Extracts 44 speaker features. Backward-compatible wrapper that discards
  * the F0 contour; use `extractSpeakerFeaturesDetailed` when the contour is
- * needed (e.g. for Tier 2 server-side cross-modal analysis).
+ * needed (e.g. for server-side analysis).
  */
 declare function extractSpeakerFeatures(audio: AudioCapture): Promise<number[]>;
 /**
  * Compute per-sample acceleration magnitude |a| = √(ax² + ay² + az²) and
- * linearly resample to a target frame count. Used for Tier 2 cross-modal
- * temporal analysis against the F0 contour; the two time-series must share
- * the same frame count for direct correlation.
+ * linearly resample to a target frame count. Surfaced for server-side
+ * analysis paired against the F0 contour; the two time-series must share
+ * the same frame count when consumed downstream.
  *
  * Returns an empty array if motion data is absent or too short.
  */
@@ -513,9 +513,8 @@ declare function submitViaWallet(proof: SolanaProof, commitment: Uint8Array, opt
  * and sets a 7-day cooldown before the next reset.
  *
  * Transaction shape: single instruction (no challenge / verify_proof /
- * ZK proof required). Humanness evidence comes from the Tier 1
- * validation pipeline invoked at the /attest step (same as mint and
- * update).
+ * ZK proof required). Humanness evidence comes from the validation
+ * pipeline invoked at the /attest step (same as mint and update).
  */
 declare function submitResetViaWallet(commitment: Uint8Array, options: {
     wallet: any;
@@ -659,8 +658,8 @@ declare function loadVerificationData(): Promise<StoredVerificationData | null>;
  * FALLBACK challenge-phrase generator. Used only when the executor's
  * `/challenge` endpoint is unreachable; the authoritative phrase comes from
  * the server (5 real words drawn from a curated English-word dictionary). On
- * this fallback path, validation skips server-side phrase content binding —
- * Tier 1 acoustic + Tier 2 cross-modal still run.
+ * this fallback path, validation skips the phrase verification step —
+ * other server-side checks still run.
  *
  * Output is 5-6 syllable pairs, forming nonsensical but speakable words.
  * Uses crypto.getRandomValues for unpredictable challenge generation.
@@ -711,16 +710,14 @@ declare function generateLissajousSequence(count?: number): {
  *
  * The executor's `/challenge` endpoint returns a fresh nonce + 5-word phrase
  * bound to the wallet for a short TTL (default 60s). The phrase is drawn from
- * a curated English-word dictionary (source of truth at
- * `entros-validation/src/word_dict.rs`); shown to the user as the voice challenge
+ * a curated English-word dictionary, shown to the user as the voice challenge
  * and looked up server-side at `/validate-features` to verify the audio
- * matches the issued phrase (master-list #89, phrase content binding).
+ * matches the issued phrase.
  *
- * Server-issued phrases are the only safe design for content binding: if the
- * client generated the phrase and sent it to the server alongside the audio,
- * an attacker would submit their own phrase matching whatever content they
- * captured. With server issuance, the phrase is bound to the nonce and the
- * client cannot substitute it.
+ * Server-issued phrases are the only safe design here: if the client generated
+ * the phrase and sent it to the server alongside the audio, an attacker would
+ * submit their own phrase matching whatever content they captured. With server
+ * issuance, the phrase is bound to the nonce and the client cannot substitute it.
  */
 /**
  * Server-issued challenge artifacts. Returned by `fetchChallenge`.
@@ -745,13 +742,13 @@ declare function fetchChallenge(executorUrl: string, walletAddress: string, apiK
 /**
  * Encode captured Float32 audio samples as base64 int16 PCM for transmission
- * to the validation service (master-list #89 phrase content binding).
+ * to the validation service.
  *
  * Audio is captured as `Float32Array` with values in `[-1.0, 1.0]` by the
- * Pulse SDK (`sensor/audio.ts`). The validation service's phrase-binding
- * module decodes base64 → Vec<i16> → Vec<f32> before feeding Whisper-tiny.
- * int16 is the standard compact representation: 2 bytes per sample vs 4 for
- * f32, halving wire size without perceptible quality loss for 16kHz speech.
+ * Pulse SDK (`sensor/audio.ts`). The validation service decodes the base64
+ * payload and feeds the audio into server-side transcription. int16 is the
+ * standard compact representation: 2 bytes per sample vs 4 for f32, halving
+ * wire size without perceptible quality loss for 16kHz speech.
  *
  * Byte layout: little-endian int16 samples, contiguous, no header.
  */

package/dist/index.d.ts CHANGED Viewed

@@ -134,8 +134,8 @@ interface VerificationResult {
      * Server-side safe-reveal (validator → executor → SDK):
      *   - `variance_floor`, `entropy_bounds`, `temporal_coupling_low`,
      *     `phrase_content_mismatch`
-     *   Surfaced for the soft-reject + retry UX (master-list #94) so the
-     *   UI can render a per-category hint.
+     *   Surfaced for the soft-reject + retry UX so the UI can render a
+     *   per-category hint.
      *
      * Client-side (SDK-emitted):
      *   - `validation_unavailable` — the relayer's `/validate-features`
@@ -360,17 +360,17 @@ declare const SPEAKER_FEATURE_COUNT = 44;
 /**
  * Extract speaker-dependent audio features.
  *
- * Captures physiological vocal characteristics (F0, jitter, shimmer, HNR, formant
- * ratios) that are stable across different utterances from the same speaker.
- * Content-independent by design — different phrases produce similar feature values.
+ * Captures physiological vocal characteristics that are stable across
+ * different utterances from the same speaker. Content-independent by
+ * design — different phrases produce similar feature values.
  *
  * Returns 44 values.
  */
 /**
  * Extracts 44 speaker features AND the raw F0 contour.
- * The F0 contour is surfaced so Tier 2 cross-modal temporal analysis can be
- * performed server-side against the motion time-series. Feature vector shape
- * and semantics are unchanged.
+ * The F0 contour is surfaced so server-side analysis can pair it with
+ * the motion time-series. Feature vector shape and semantics are
+ * unchanged.
  */
 declare function extractSpeakerFeaturesDetailed(audio: AudioCapture): Promise<{
     features: number[];
@@ -379,15 +379,15 @@ declare function extractSpeakerFeaturesDetailed(audio: AudioCapture): Promise<{
 /**
  * Extracts 44 speaker features. Backward-compatible wrapper that discards
  * the F0 contour; use `extractSpeakerFeaturesDetailed` when the contour is
- * needed (e.g. for Tier 2 server-side cross-modal analysis).
+ * needed (e.g. for server-side analysis).
  */
 declare function extractSpeakerFeatures(audio: AudioCapture): Promise<number[]>;
 /**
  * Compute per-sample acceleration magnitude |a| = √(ax² + ay² + az²) and
- * linearly resample to a target frame count. Used for Tier 2 cross-modal
- * temporal analysis against the F0 contour; the two time-series must share
- * the same frame count for direct correlation.
+ * linearly resample to a target frame count. Surfaced for server-side
+ * analysis paired against the F0 contour; the two time-series must share
+ * the same frame count when consumed downstream.
  *
  * Returns an empty array if motion data is absent or too short.
  */
@@ -513,9 +513,8 @@ declare function submitViaWallet(proof: SolanaProof, commitment: Uint8Array, opt
  * and sets a 7-day cooldown before the next reset.
  *
  * Transaction shape: single instruction (no challenge / verify_proof /
- * ZK proof required). Humanness evidence comes from the Tier 1
- * validation pipeline invoked at the /attest step (same as mint and
- * update).
+ * ZK proof required). Humanness evidence comes from the validation
+ * pipeline invoked at the /attest step (same as mint and update).
  */
 declare function submitResetViaWallet(commitment: Uint8Array, options: {
     wallet: any;
@@ -659,8 +658,8 @@ declare function loadVerificationData(): Promise<StoredVerificationData | null>;
  * FALLBACK challenge-phrase generator. Used only when the executor's
  * `/challenge` endpoint is unreachable; the authoritative phrase comes from
  * the server (5 real words drawn from a curated English-word dictionary). On
- * this fallback path, validation skips server-side phrase content binding —
- * Tier 1 acoustic + Tier 2 cross-modal still run.
+ * this fallback path, validation skips the phrase verification step —
+ * other server-side checks still run.
  *
  * Output is 5-6 syllable pairs, forming nonsensical but speakable words.
  * Uses crypto.getRandomValues for unpredictable challenge generation.
@@ -711,16 +710,14 @@ declare function generateLissajousSequence(count?: number): {
  *
  * The executor's `/challenge` endpoint returns a fresh nonce + 5-word phrase
  * bound to the wallet for a short TTL (default 60s). The phrase is drawn from
- * a curated English-word dictionary (source of truth at
- * `entros-validation/src/word_dict.rs`); shown to the user as the voice challenge
+ * a curated English-word dictionary, shown to the user as the voice challenge
  * and looked up server-side at `/validate-features` to verify the audio
- * matches the issued phrase (master-list #89, phrase content binding).
+ * matches the issued phrase.
  *
- * Server-issued phrases are the only safe design for content binding: if the
- * client generated the phrase and sent it to the server alongside the audio,
- * an attacker would submit their own phrase matching whatever content they
- * captured. With server issuance, the phrase is bound to the nonce and the
- * client cannot substitute it.
+ * Server-issued phrases are the only safe design here: if the client generated
+ * the phrase and sent it to the server alongside the audio, an attacker would
+ * submit their own phrase matching whatever content they captured. With server
+ * issuance, the phrase is bound to the nonce and the client cannot substitute it.
  */
 /**
  * Server-issued challenge artifacts. Returned by `fetchChallenge`.
@@ -745,13 +742,13 @@ declare function fetchChallenge(executorUrl: string, walletAddress: string, apiK
 /**
  * Encode captured Float32 audio samples as base64 int16 PCM for transmission
- * to the validation service (master-list #89 phrase content binding).
+ * to the validation service.
  *
  * Audio is captured as `Float32Array` with values in `[-1.0, 1.0]` by the
- * Pulse SDK (`sensor/audio.ts`). The validation service's phrase-binding
- * module decodes base64 → Vec<i16> → Vec<f32> before feeding Whisper-tiny.
- * int16 is the standard compact representation: 2 bytes per sample vs 4 for
- * f32, halving wire size without perceptible quality loss for 16kHz speech.
+ * Pulse SDK (`sensor/audio.ts`). The validation service decodes the base64
+ * payload and feeds the audio into server-side transcription. int16 is the
+ * standard compact representation: 2 bytes per sample vs 4 for f32, halving
+ * wire size without perceptible quality loss for 16kHz speech.
  *
  * Byte layout: little-endian int16 samples, contiguous, no header.
  */

package/dist/index.js CHANGED Viewed

@@ -150,9 +150,30 @@ async function captureAudio(options = {}) {
     audio: {
       sampleRate: TARGET_SAMPLE_RATE,
       channelCount: 1,
+      // Capture without browser-side audio processing — preserves the
+      // raw microphone signal for the SDK's downstream feature extraction
+      // and for server-side validation. Audio cleanup intended for the
+      // transcription path runs server-side, on a parallel path that
+      // never feeds back to feature extraction. Matches the mobile SDK's
+      // choice of Android's `MIC` source over `VOICE_RECOGNITION` —
+      // same architectural decision, two platforms.
       echoCancellation: false,
       noiseSuppression: false,
-      autoGainControl: false
+      autoGainControl: false,
+      // OS-level voice isolation request (W3C Media Capture Extensions,
+      // 2024). Activates the platform DSP on Chrome 124+ / ChromeOS and
+      // surfaces Apple Voice Isolation Mic Mode on Safari macOS Sonoma+
+      // / iOS 17+ when the user has it enabled in Control Center.
+      // Silently ignored on browsers/OSes without support, so the
+      // constraint costs nothing where it doesn't help. Distinct
+      // mechanism from `noiseSuppression` above — that flag controls
+      // WebRTC's hand-tuned AudioProcessingModule, this requests the
+      // OS-native neural effect.
+      // @ts-expect-error -- W3C Media Capture Extensions property; not
+      // yet in lib.dom.d.ts as of TypeScript 6.0. Removing this directive
+      // becomes a compile error once lib.dom catches up, signaling that
+      // it can be deleted.
+      voiceIsolation: true
     }
   });
   let ctx;
@@ -652,6 +673,7 @@ async function getMeyda() {
   }
   return meydaModule.default ?? meydaModule;
 }
+var F0_YIELD_EVERY_N_FRAMES = 16;
 async function detectF0Contour(samples, sampleRate) {
   const detect = await getPitchDetector(sampleRate);
   const frameSize = getFrameSize(sampleRate);
@@ -678,6 +700,9 @@ async function detectF0Contour(samples, sampleRate) {
       sum += (frame[j] ?? 0) * (frame[j] ?? 0);
     }
     amplitudes.push(Math.sqrt(sum / frame.length));
+    if (i > 0 && i < numFrames - 1 && i % F0_YIELD_EVERY_N_FRAMES === 0) {
+      await yieldToMainThread();
+    }
   }
   return { f0, amplitudes, periods };
 }
@@ -4744,9 +4769,16 @@ var PulseSession = class {
       audio: {
         sampleRate: 16e3,
         channelCount: 1,
+        // Capture constraints kept in lock-step with `sensor/audio.ts` —
+        // the two entry points (standalone capture vs session-based
+        // capture) must agree or the verify flow and direct-API
+        // consumers diverge.
         echoCancellation: false,
         noiseSuppression: false,
-        autoGainControl: false
+        autoGainControl: false,
+        // @ts-expect-error -- W3C Media Capture Extensions property; not
+        // yet in lib.dom.d.ts as of TypeScript 6.0.
+        voiceIsolation: true
       }
     });
     this.audioStageState = "capturing";