@entros/pulse-sdk 1.5.1 → 1.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +27 -30
- package/dist/index.d.ts +27 -30
- package/dist/index.js +34 -2
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +34 -2
- package/dist/index.mjs.map +1 -1
- package/package.json +4 -1
package/dist/index.d.mts
CHANGED
|
@@ -134,8 +134,8 @@ interface VerificationResult {
|
|
|
134
134
|
* Server-side safe-reveal (validator → executor → SDK):
|
|
135
135
|
* - `variance_floor`, `entropy_bounds`, `temporal_coupling_low`,
|
|
136
136
|
* `phrase_content_mismatch`
|
|
137
|
-
* Surfaced for the soft-reject + retry UX
|
|
138
|
-
*
|
|
137
|
+
* Surfaced for the soft-reject + retry UX so the UI can render a
|
|
138
|
+
* per-category hint.
|
|
139
139
|
*
|
|
140
140
|
* Client-side (SDK-emitted):
|
|
141
141
|
* - `validation_unavailable` — the relayer's `/validate-features`
|
|
@@ -360,17 +360,17 @@ declare const SPEAKER_FEATURE_COUNT = 44;
|
|
|
360
360
|
/**
|
|
361
361
|
* Extract speaker-dependent audio features.
|
|
362
362
|
*
|
|
363
|
-
* Captures physiological vocal characteristics
|
|
364
|
-
*
|
|
365
|
-
*
|
|
363
|
+
* Captures physiological vocal characteristics that are stable across
|
|
364
|
+
* different utterances from the same speaker. Content-independent by
|
|
365
|
+
* design — different phrases produce similar feature values.
|
|
366
366
|
*
|
|
367
367
|
* Returns 44 values.
|
|
368
368
|
*/
|
|
369
369
|
/**
|
|
370
370
|
* Extracts 44 speaker features AND the raw F0 contour.
|
|
371
|
-
* The F0 contour is surfaced so
|
|
372
|
-
*
|
|
373
|
-
*
|
|
371
|
+
* The F0 contour is surfaced so server-side analysis can pair it with
|
|
372
|
+
* the motion time-series. Feature vector shape and semantics are
|
|
373
|
+
* unchanged.
|
|
374
374
|
*/
|
|
375
375
|
declare function extractSpeakerFeaturesDetailed(audio: AudioCapture): Promise<{
|
|
376
376
|
features: number[];
|
|
@@ -379,15 +379,15 @@ declare function extractSpeakerFeaturesDetailed(audio: AudioCapture): Promise<{
|
|
|
379
379
|
/**
|
|
380
380
|
* Extracts 44 speaker features. Backward-compatible wrapper that discards
|
|
381
381
|
* the F0 contour; use `extractSpeakerFeaturesDetailed` when the contour is
|
|
382
|
-
* needed (e.g. for
|
|
382
|
+
* needed (e.g. for server-side analysis).
|
|
383
383
|
*/
|
|
384
384
|
declare function extractSpeakerFeatures(audio: AudioCapture): Promise<number[]>;
|
|
385
385
|
|
|
386
386
|
/**
|
|
387
387
|
* Compute per-sample acceleration magnitude |a| = √(ax² + ay² + az²) and
|
|
388
|
-
* linearly resample to a target frame count.
|
|
389
|
-
*
|
|
390
|
-
* the same frame count
|
|
388
|
+
* linearly resample to a target frame count. Surfaced for server-side
|
|
389
|
+
* analysis paired against the F0 contour; the two time-series must share
|
|
390
|
+
* the same frame count when consumed downstream.
|
|
391
391
|
*
|
|
392
392
|
* Returns an empty array if motion data is absent or too short.
|
|
393
393
|
*/
|
|
@@ -513,9 +513,8 @@ declare function submitViaWallet(proof: SolanaProof, commitment: Uint8Array, opt
|
|
|
513
513
|
* and sets a 7-day cooldown before the next reset.
|
|
514
514
|
*
|
|
515
515
|
* Transaction shape: single instruction (no challenge / verify_proof /
|
|
516
|
-
* ZK proof required). Humanness evidence comes from the
|
|
517
|
-
*
|
|
518
|
-
* update).
|
|
516
|
+
* ZK proof required). Humanness evidence comes from the validation
|
|
517
|
+
* pipeline invoked at the /attest step (same as mint and update).
|
|
519
518
|
*/
|
|
520
519
|
declare function submitResetViaWallet(commitment: Uint8Array, options: {
|
|
521
520
|
wallet: any;
|
|
@@ -659,8 +658,8 @@ declare function loadVerificationData(): Promise<StoredVerificationData | null>;
|
|
|
659
658
|
* FALLBACK challenge-phrase generator. Used only when the executor's
|
|
660
659
|
* `/challenge` endpoint is unreachable; the authoritative phrase comes from
|
|
661
660
|
* the server (5 real words drawn from a curated English-word dictionary). On
|
|
662
|
-
* this fallback path, validation skips
|
|
663
|
-
*
|
|
661
|
+
* this fallback path, validation skips the phrase verification step —
|
|
662
|
+
* other server-side checks still run.
|
|
664
663
|
*
|
|
665
664
|
* Output is 5-6 syllable pairs, forming nonsensical but speakable words.
|
|
666
665
|
* Uses crypto.getRandomValues for unpredictable challenge generation.
|
|
@@ -711,16 +710,14 @@ declare function generateLissajousSequence(count?: number): {
|
|
|
711
710
|
*
|
|
712
711
|
* The executor's `/challenge` endpoint returns a fresh nonce + 5-word phrase
|
|
713
712
|
* bound to the wallet for a short TTL (default 60s). The phrase is drawn from
|
|
714
|
-
* a curated English-word dictionary
|
|
715
|
-
* `entros-validation/src/word_dict.rs`); shown to the user as the voice challenge
|
|
713
|
+
* a curated English-word dictionary, shown to the user as the voice challenge
|
|
716
714
|
* and looked up server-side at `/validate-features` to verify the audio
|
|
717
|
-
* matches the issued phrase
|
|
715
|
+
* matches the issued phrase.
|
|
718
716
|
*
|
|
719
|
-
* Server-issued phrases are the only safe design
|
|
720
|
-
*
|
|
721
|
-
*
|
|
722
|
-
*
|
|
723
|
-
* client cannot substitute it.
|
|
717
|
+
* Server-issued phrases are the only safe design here: if the client generated
|
|
718
|
+
* the phrase and sent it to the server alongside the audio, an attacker would
|
|
719
|
+
* submit their own phrase matching whatever content they captured. With server
|
|
720
|
+
* issuance, the phrase is bound to the nonce and the client cannot substitute it.
|
|
724
721
|
*/
|
|
725
722
|
/**
|
|
726
723
|
* Server-issued challenge artifacts. Returned by `fetchChallenge`.
|
|
@@ -745,13 +742,13 @@ declare function fetchChallenge(executorUrl: string, walletAddress: string, apiK
|
|
|
745
742
|
|
|
746
743
|
/**
|
|
747
744
|
* Encode captured Float32 audio samples as base64 int16 PCM for transmission
|
|
748
|
-
* to the validation service
|
|
745
|
+
* to the validation service.
|
|
749
746
|
*
|
|
750
747
|
* Audio is captured as `Float32Array` with values in `[-1.0, 1.0]` by the
|
|
751
|
-
* Pulse SDK (`sensor/audio.ts`). The validation service
|
|
752
|
-
*
|
|
753
|
-
*
|
|
754
|
-
*
|
|
748
|
+
* Pulse SDK (`sensor/audio.ts`). The validation service decodes the base64
|
|
749
|
+
* payload and feeds the audio into server-side transcription. int16 is the
|
|
750
|
+
* standard compact representation: 2 bytes per sample vs 4 for f32, halving
|
|
751
|
+
* wire size without perceptible quality loss for 16kHz speech.
|
|
755
752
|
*
|
|
756
753
|
* Byte layout: little-endian int16 samples, contiguous, no header.
|
|
757
754
|
*/
|
package/dist/index.d.ts
CHANGED
|
@@ -134,8 +134,8 @@ interface VerificationResult {
|
|
|
134
134
|
* Server-side safe-reveal (validator → executor → SDK):
|
|
135
135
|
* - `variance_floor`, `entropy_bounds`, `temporal_coupling_low`,
|
|
136
136
|
* `phrase_content_mismatch`
|
|
137
|
-
* Surfaced for the soft-reject + retry UX
|
|
138
|
-
*
|
|
137
|
+
* Surfaced for the soft-reject + retry UX so the UI can render a
|
|
138
|
+
* per-category hint.
|
|
139
139
|
*
|
|
140
140
|
* Client-side (SDK-emitted):
|
|
141
141
|
* - `validation_unavailable` — the relayer's `/validate-features`
|
|
@@ -360,17 +360,17 @@ declare const SPEAKER_FEATURE_COUNT = 44;
|
|
|
360
360
|
/**
|
|
361
361
|
* Extract speaker-dependent audio features.
|
|
362
362
|
*
|
|
363
|
-
* Captures physiological vocal characteristics
|
|
364
|
-
*
|
|
365
|
-
*
|
|
363
|
+
* Captures physiological vocal characteristics that are stable across
|
|
364
|
+
* different utterances from the same speaker. Content-independent by
|
|
365
|
+
* design — different phrases produce similar feature values.
|
|
366
366
|
*
|
|
367
367
|
* Returns 44 values.
|
|
368
368
|
*/
|
|
369
369
|
/**
|
|
370
370
|
* Extracts 44 speaker features AND the raw F0 contour.
|
|
371
|
-
* The F0 contour is surfaced so
|
|
372
|
-
*
|
|
373
|
-
*
|
|
371
|
+
* The F0 contour is surfaced so server-side analysis can pair it with
|
|
372
|
+
* the motion time-series. Feature vector shape and semantics are
|
|
373
|
+
* unchanged.
|
|
374
374
|
*/
|
|
375
375
|
declare function extractSpeakerFeaturesDetailed(audio: AudioCapture): Promise<{
|
|
376
376
|
features: number[];
|
|
@@ -379,15 +379,15 @@ declare function extractSpeakerFeaturesDetailed(audio: AudioCapture): Promise<{
|
|
|
379
379
|
/**
|
|
380
380
|
* Extracts 44 speaker features. Backward-compatible wrapper that discards
|
|
381
381
|
* the F0 contour; use `extractSpeakerFeaturesDetailed` when the contour is
|
|
382
|
-
* needed (e.g. for
|
|
382
|
+
* needed (e.g. for server-side analysis).
|
|
383
383
|
*/
|
|
384
384
|
declare function extractSpeakerFeatures(audio: AudioCapture): Promise<number[]>;
|
|
385
385
|
|
|
386
386
|
/**
|
|
387
387
|
* Compute per-sample acceleration magnitude |a| = √(ax² + ay² + az²) and
|
|
388
|
-
* linearly resample to a target frame count.
|
|
389
|
-
*
|
|
390
|
-
* the same frame count
|
|
388
|
+
* linearly resample to a target frame count. Surfaced for server-side
|
|
389
|
+
* analysis paired against the F0 contour; the two time-series must share
|
|
390
|
+
* the same frame count when consumed downstream.
|
|
391
391
|
*
|
|
392
392
|
* Returns an empty array if motion data is absent or too short.
|
|
393
393
|
*/
|
|
@@ -513,9 +513,8 @@ declare function submitViaWallet(proof: SolanaProof, commitment: Uint8Array, opt
|
|
|
513
513
|
* and sets a 7-day cooldown before the next reset.
|
|
514
514
|
*
|
|
515
515
|
* Transaction shape: single instruction (no challenge / verify_proof /
|
|
516
|
-
* ZK proof required). Humanness evidence comes from the
|
|
517
|
-
*
|
|
518
|
-
* update).
|
|
516
|
+
* ZK proof required). Humanness evidence comes from the validation
|
|
517
|
+
* pipeline invoked at the /attest step (same as mint and update).
|
|
519
518
|
*/
|
|
520
519
|
declare function submitResetViaWallet(commitment: Uint8Array, options: {
|
|
521
520
|
wallet: any;
|
|
@@ -659,8 +658,8 @@ declare function loadVerificationData(): Promise<StoredVerificationData | null>;
|
|
|
659
658
|
* FALLBACK challenge-phrase generator. Used only when the executor's
|
|
660
659
|
* `/challenge` endpoint is unreachable; the authoritative phrase comes from
|
|
661
660
|
* the server (5 real words drawn from a curated English-word dictionary). On
|
|
662
|
-
* this fallback path, validation skips
|
|
663
|
-
*
|
|
661
|
+
* this fallback path, validation skips the phrase verification step —
|
|
662
|
+
* other server-side checks still run.
|
|
664
663
|
*
|
|
665
664
|
* Output is 5-6 syllable pairs, forming nonsensical but speakable words.
|
|
666
665
|
* Uses crypto.getRandomValues for unpredictable challenge generation.
|
|
@@ -711,16 +710,14 @@ declare function generateLissajousSequence(count?: number): {
|
|
|
711
710
|
*
|
|
712
711
|
* The executor's `/challenge` endpoint returns a fresh nonce + 5-word phrase
|
|
713
712
|
* bound to the wallet for a short TTL (default 60s). The phrase is drawn from
|
|
714
|
-
* a curated English-word dictionary
|
|
715
|
-
* `entros-validation/src/word_dict.rs`); shown to the user as the voice challenge
|
|
713
|
+
* a curated English-word dictionary, shown to the user as the voice challenge
|
|
716
714
|
* and looked up server-side at `/validate-features` to verify the audio
|
|
717
|
-
* matches the issued phrase
|
|
715
|
+
* matches the issued phrase.
|
|
718
716
|
*
|
|
719
|
-
* Server-issued phrases are the only safe design
|
|
720
|
-
*
|
|
721
|
-
*
|
|
722
|
-
*
|
|
723
|
-
* client cannot substitute it.
|
|
717
|
+
* Server-issued phrases are the only safe design here: if the client generated
|
|
718
|
+
* the phrase and sent it to the server alongside the audio, an attacker would
|
|
719
|
+
* submit their own phrase matching whatever content they captured. With server
|
|
720
|
+
* issuance, the phrase is bound to the nonce and the client cannot substitute it.
|
|
724
721
|
*/
|
|
725
722
|
/**
|
|
726
723
|
* Server-issued challenge artifacts. Returned by `fetchChallenge`.
|
|
@@ -745,13 +742,13 @@ declare function fetchChallenge(executorUrl: string, walletAddress: string, apiK
|
|
|
745
742
|
|
|
746
743
|
/**
|
|
747
744
|
* Encode captured Float32 audio samples as base64 int16 PCM for transmission
|
|
748
|
-
* to the validation service
|
|
745
|
+
* to the validation service.
|
|
749
746
|
*
|
|
750
747
|
* Audio is captured as `Float32Array` with values in `[-1.0, 1.0]` by the
|
|
751
|
-
* Pulse SDK (`sensor/audio.ts`). The validation service
|
|
752
|
-
*
|
|
753
|
-
*
|
|
754
|
-
*
|
|
748
|
+
* Pulse SDK (`sensor/audio.ts`). The validation service decodes the base64
|
|
749
|
+
* payload and feeds the audio into server-side transcription. int16 is the
|
|
750
|
+
* standard compact representation: 2 bytes per sample vs 4 for f32, halving
|
|
751
|
+
* wire size without perceptible quality loss for 16kHz speech.
|
|
755
752
|
*
|
|
756
753
|
* Byte layout: little-endian int16 samples, contiguous, no header.
|
|
757
754
|
*/
|
package/dist/index.js
CHANGED
|
@@ -150,9 +150,30 @@ async function captureAudio(options = {}) {
|
|
|
150
150
|
audio: {
|
|
151
151
|
sampleRate: TARGET_SAMPLE_RATE,
|
|
152
152
|
channelCount: 1,
|
|
153
|
+
// Capture without browser-side audio processing — preserves the
|
|
154
|
+
// raw microphone signal for the SDK's downstream feature extraction
|
|
155
|
+
// and for server-side validation. Audio cleanup intended for the
|
|
156
|
+
// transcription path runs server-side, on a parallel path that
|
|
157
|
+
// never feeds back to feature extraction. Matches the mobile SDK's
|
|
158
|
+
// choice of Android's `MIC` source over `VOICE_RECOGNITION` —
|
|
159
|
+
// same architectural decision, two platforms.
|
|
153
160
|
echoCancellation: false,
|
|
154
161
|
noiseSuppression: false,
|
|
155
|
-
autoGainControl: false
|
|
162
|
+
autoGainControl: false,
|
|
163
|
+
// OS-level voice isolation request (W3C Media Capture Extensions,
|
|
164
|
+
// 2024). Activates the platform DSP on Chrome 124+ / ChromeOS and
|
|
165
|
+
// surfaces Apple Voice Isolation Mic Mode on Safari macOS Sonoma+
|
|
166
|
+
// / iOS 17+ when the user has it enabled in Control Center.
|
|
167
|
+
// Silently ignored on browsers/OSes without support, so the
|
|
168
|
+
// constraint costs nothing where it doesn't help. Distinct
|
|
169
|
+
// mechanism from `noiseSuppression` above — that flag controls
|
|
170
|
+
// WebRTC's hand-tuned AudioProcessingModule, this requests the
|
|
171
|
+
// OS-native neural effect.
|
|
172
|
+
// @ts-expect-error -- W3C Media Capture Extensions property; not
|
|
173
|
+
// yet in lib.dom.d.ts as of TypeScript 6.0. Removing this directive
|
|
174
|
+
// becomes a compile error once lib.dom catches up, signaling that
|
|
175
|
+
// it can be deleted.
|
|
176
|
+
voiceIsolation: true
|
|
156
177
|
}
|
|
157
178
|
});
|
|
158
179
|
let ctx;
|
|
@@ -652,6 +673,7 @@ async function getMeyda() {
|
|
|
652
673
|
}
|
|
653
674
|
return meydaModule.default ?? meydaModule;
|
|
654
675
|
}
|
|
676
|
+
var F0_YIELD_EVERY_N_FRAMES = 16;
|
|
655
677
|
async function detectF0Contour(samples, sampleRate) {
|
|
656
678
|
const detect = await getPitchDetector(sampleRate);
|
|
657
679
|
const frameSize = getFrameSize(sampleRate);
|
|
@@ -678,6 +700,9 @@ async function detectF0Contour(samples, sampleRate) {
|
|
|
678
700
|
sum += (frame[j] ?? 0) * (frame[j] ?? 0);
|
|
679
701
|
}
|
|
680
702
|
amplitudes.push(Math.sqrt(sum / frame.length));
|
|
703
|
+
if (i > 0 && i < numFrames - 1 && i % F0_YIELD_EVERY_N_FRAMES === 0) {
|
|
704
|
+
await yieldToMainThread();
|
|
705
|
+
}
|
|
681
706
|
}
|
|
682
707
|
return { f0, amplitudes, periods };
|
|
683
708
|
}
|
|
@@ -4744,9 +4769,16 @@ var PulseSession = class {
|
|
|
4744
4769
|
audio: {
|
|
4745
4770
|
sampleRate: 16e3,
|
|
4746
4771
|
channelCount: 1,
|
|
4772
|
+
// Capture constraints kept in lock-step with `sensor/audio.ts` —
|
|
4773
|
+
// the two entry points (standalone capture vs session-based
|
|
4774
|
+
// capture) must agree or the verify flow and direct-API
|
|
4775
|
+
// consumers diverge.
|
|
4747
4776
|
echoCancellation: false,
|
|
4748
4777
|
noiseSuppression: false,
|
|
4749
|
-
autoGainControl: false
|
|
4778
|
+
autoGainControl: false,
|
|
4779
|
+
// @ts-expect-error -- W3C Media Capture Extensions property; not
|
|
4780
|
+
// yet in lib.dom.d.ts as of TypeScript 6.0.
|
|
4781
|
+
voiceIsolation: true
|
|
4750
4782
|
}
|
|
4751
4783
|
});
|
|
4752
4784
|
this.audioStageState = "capturing";
|