@fugood/bricks-project 2.21.0-beta.21 → 2.21.0-beta.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -441,11 +441,18 @@ export const templateActionNameMap = {
441
441
  },
442
442
  GENERATOR_SPEECH_INFERENCE: {
443
443
  GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_FILE: {
444
+ fileUrl: 'GENERATOR_SPEECH_INFERENCE_FILE_URL',
445
+ prompt: 'GENERATOR_SPEECH_INFERENCE_PROMPT',
446
+ beamSize: 'GENERATOR_SPEECH_INFERENCE_BEAM_SIZE',
447
+ language: 'GENERATOR_SPEECH_INFERENCE_LANGUAGE',
448
+ translate: 'GENERATOR_SPEECH_INFERENCE_TRANSLATE',
449
+ },
450
+ GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_DATA: {
451
+ data: 'GENERATOR_SPEECH_INFERENCE_DATA',
444
452
  prompt: 'GENERATOR_SPEECH_INFERENCE_PROMPT',
445
453
  beamSize: 'GENERATOR_SPEECH_INFERENCE_BEAM_SIZE',
446
454
  language: 'GENERATOR_SPEECH_INFERENCE_LANGUAGE',
447
455
  translate: 'GENERATOR_SPEECH_INFERENCE_TRANSLATE',
448
- fileUrl: 'GENERATOR_SPEECH_INFERENCE_FILE_URL',
449
456
  },
450
457
  GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_REALTIME: {
451
458
  prompt: 'GENERATOR_SPEECH_INFERENCE_PROMPT',
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fugood/bricks-project",
3
- "version": "2.21.0-beta.21",
3
+ "version": "2.21.0-beta.22",
4
4
  "main": "index.ts",
5
5
  "scripts": {
6
6
  "build": "node scripts/build.js"
@@ -13,5 +13,5 @@
13
13
  "lodash": "^4.17.4",
14
14
  "uuid": "^8.3.1"
15
15
  },
16
- "gitHead": "129ece680c77c677a3b643a1102d056533e5792c"
16
+ "gitHead": "3e8cf8167c1692f5e981c122f144ef4680cac28d"
17
17
  }
package/types/data.ts CHANGED
@@ -72,7 +72,7 @@ export type DataAssetKind = {
72
72
  | 'binary-asset'
73
73
  preload?: {
74
74
  type: 'url'
75
- hashType: 'md5' | 'sha256'
75
+ hashType: 'md5' | 'sha256' | 'sha1'
76
76
  hash: string
77
77
  }
78
78
  metadata?: { [key: string]: any }
@@ -438,7 +438,7 @@ Default property:
438
438
  /* Hash value */
439
439
  hash?: string | DataLink
440
440
  /* Hash type */
441
- hashType?: 'md5' | 'sha256' | DataLink
441
+ hashType?: 'md5' | 'sha256' | 'sha1' | DataLink
442
442
  /* Request method */
443
443
  method?: 'GET' | 'POST' | 'PUT' | 'DELETE' | 'PATCH' | DataLink
444
444
  /* Header */
@@ -455,7 +455,7 @@ Default property:
455
455
  encoding?: 'utf8' | 'base64' | DataLink
456
456
  url?: string | DataLink
457
457
  hash?: string | DataLink
458
- hashType?: 'sha256' | 'md5' | DataLink
458
+ hashType?: 'sha256' | 'sha1' | 'md5' | DataLink
459
459
  method?: 'GET' | 'POST' | 'PUT' | 'DELETE' | 'PATCH' | DataLink
460
460
  header?: {} | DataLink
461
461
  fields?: {} | DataLink
@@ -4260,6 +4260,7 @@ interface GeneratorQuestionDef {
4260
4260
  /*
4261
4261
  Default property:
4262
4262
  {
4263
+ "modalMode": "root",
4263
4264
  "showClose": true,
4264
4265
  "showBack": true,
4265
4266
  "cameraType": "back",
@@ -4271,6 +4272,8 @@ Default property:
4271
4272
  }
4272
4273
  */
4273
4274
  property?: {
4275
+ /* Modal mode */
4276
+ modalMode?: 'root' | 'in-subspace' | DataLink
4274
4277
  /* Inquirer schema
4275
4278
  `key`: Field key (unique, required)
4276
4279
  `kind`: Field type (required)
@@ -5711,6 +5714,11 @@ export type GeneratorSpeechInferenceActionLoadModel = Action & {
5711
5714
  export type GeneratorSpeechInferenceActionTranscribeFile = ActionWithParams & {
5712
5715
  __actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_FILE'
5713
5716
  params?: Array<
5717
+ | {
5718
+ input: 'fileUrl'
5719
+ value?: string | DataLink
5720
+ mapping?: string
5721
+ }
5714
5722
  | {
5715
5723
  input: 'prompt'
5716
5724
  value?: string | DataLink
@@ -5731,11 +5739,38 @@ export type GeneratorSpeechInferenceActionTranscribeFile = ActionWithParams & {
5731
5739
  value?: boolean | DataLink
5732
5740
  mapping?: string
5733
5741
  }
5742
+ >
5743
+ }
5744
+
5745
+ /* Transcribe audio data. Currently only support base64 encoded audio data (16-bit PCM, mono, 16kHz) */
5746
+ export type GeneratorSpeechInferenceActionTranscribeData = ActionWithParams & {
5747
+ __actionName: 'GENERATOR_SPEECH_INFERENCE_TRANSCRIBE_DATA'
5748
+ params?: Array<
5734
5749
  | {
5735
- input: 'fileUrl'
5750
+ input: 'data'
5751
+ value?: any
5752
+ mapping?: string
5753
+ }
5754
+ | {
5755
+ input: 'prompt'
5756
+ value?: string | DataLink
5757
+ mapping?: string
5758
+ }
5759
+ | {
5760
+ input: 'beamSize'
5761
+ value?: number | DataLink
5762
+ mapping?: string
5763
+ }
5764
+ | {
5765
+ input: 'language'
5736
5766
  value?: string | DataLink
5737
5767
  mapping?: string
5738
5768
  }
5769
+ | {
5770
+ input: 'translate'
5771
+ value?: boolean | DataLink
5772
+ mapping?: string
5773
+ }
5739
5774
  >
5740
5775
  }
5741
5776
 
@@ -5826,10 +5861,10 @@ interface GeneratorSpeechInferenceDef {
5826
5861
  Default property:
5827
5862
  {
5828
5863
  "init": false,
5829
- "modelType": "Base",
5830
- "modelQuantizedType": "none",
5864
+ "modelName": "base-q8_0",
5831
5865
  "modelUseCoreML": false,
5832
5866
  "modelUseGPU": true,
5867
+ "modelUseFlashAttn": false,
5833
5868
  "inferLanguage": "Auto",
5834
5869
  "inferRealtimeAudioSeconds": 30,
5835
5870
  "inferRealtimeAudioSliceSeconds": 30,
@@ -5844,35 +5879,71 @@ Default property:
5844
5879
  /* Initialize the Whisper context on generator initialization
5845
5880
  Please note that it will take some RAM depending on the model size */
5846
5881
  init?: boolean | DataLink
5847
- /* Use model type, the model download progress will be done in preload stage or the generator initialization stage.
5848
- We used `ggml` format model, please refer to https://github.com/ggerganov/whisper.cpp/tree/master/models
5849
- You can also choose `Custom` option and set `Model URL` and `Model MD5` to use your own model */
5850
- modelType?:
5851
- | 'Custom'
5852
- | 'Tiny'
5853
- | 'Base'
5854
- | 'Small'
5855
- | 'Medium'
5856
- | 'Large'
5857
- | 'Distil Large v2'
5858
- | 'Tiny (English only)'
5859
- | 'Base (English only)'
5860
- | 'Small (English only)'
5861
- | 'Medium (English only)'
5862
- | 'Distil Small (English only)'
5863
- | 'Distil Medium (English only)'
5882
+ /* Use model name, the model download progress will be done in preload stage or the generator initialization stage.
5883
+ We used `ggml` format model, please refer to https://huggingface.co/BricksDisplay/whisper-ggml
5884
+ You can also choose `custom` option and set `Model URL` and `Model MD5` to use your own model */
5885
+ modelName?:
5886
+ | 'custom'
5887
+ | 'tiny'
5888
+ | 'tiny-q5_1'
5889
+ | 'tiny-q8_0'
5890
+ | 'tiny.en'
5891
+ | 'tiny.en-q5_1'
5892
+ | 'tiny.en-q8_0'
5893
+ | 'base'
5894
+ | 'base-q5_1'
5895
+ | 'base-q8_0'
5896
+ | 'base.en'
5897
+ | 'base.en-q5_1'
5898
+ | 'base.en-q8_0'
5899
+ | 'small'
5900
+ | 'small-q5_0'
5901
+ | 'small-q5_1'
5902
+ | 'small-q8_0'
5903
+ | 'small.en'
5904
+ | 'small.en-q5_1'
5905
+ | 'small.en-q8_0'
5906
+ | 'medium'
5907
+ | 'medium-q5_0'
5908
+ | 'medium-q5_1'
5909
+ | 'medium-q8_0'
5910
+ | 'medium.en'
5911
+ | 'medium.en-q5_1'
5912
+ | 'medium.en-q8_0'
5913
+ | 'large-v1'
5914
+ | 'large-v2'
5915
+ | 'large-v2-q5_0'
5916
+ | 'large-v2-q8_0'
5917
+ | 'large-v3'
5918
+ | 'large-v3-q5_0'
5919
+ | 'large-v3-q8_0'
5920
+ | 'large-v3-turbo'
5921
+ | 'large-v3-turbo-q5_0'
5922
+ | 'large-v3-turbo-q8_0'
5923
+ | 'small.en-tdrz'
5924
+ | 'small.en-tdrz-q5_0'
5925
+ | 'small.en-tdrz-q8_0'
5926
+ | 'distil-small.en'
5927
+ | 'distil-small.en-q5_0'
5928
+ | 'distil-small.en-q8_0'
5929
+ | 'distil-medium.en'
5930
+ | 'distil-medium.en-q5_0'
5931
+ | 'distil-medium.en-q8_0'
5932
+ | 'distil-large-v3'
5933
+ | 'distil-large-v3-q5_0'
5934
+ | 'distil-large-v3-q8_0'
5864
5935
  | DataLink
5865
- /* Use quantized model */
5866
- modelQuantizedType?: 'none' | 'q4_0' | 'q4_1' | 'q5_0' | 'q5_1' | 'q8_0' | DataLink
5867
5936
  /* The URL or path of model
5868
5937
  We used `ggml` format model, please refer to https://github.com/ggerganov/whisper.cpp/tree/master/models */
5869
5938
  modelUrl?: string | DataLink
5870
- /* MD5 of model JSON */
5871
- modelMd5?: string | DataLink
5939
+ /* SHA1 of model */
5940
+ modelSha1?: string | DataLink
5872
5941
  /* [Unstable] iOS: Use CoreML model for inference */
5873
5942
  modelUseCoreML?: boolean | DataLink
5874
5943
  /* Use GPU Acceleration for inference. Currently iOS only, if it's enabled, Core ML option will be ignored. */
5875
5944
  modelUseGPU?: boolean | DataLink
5945
+ /* Use Flash Attention for inference (Recommended with GPU enabled) */
5946
+ modelUseFlashAttn?: boolean | DataLink
5876
5947
  /* The language of the file to be inferred */
5877
5948
  inferLanguage?:
5878
5949
  | 'Auto'
@@ -5986,6 +6057,8 @@ Default property:
5986
6057
  inferMaxThreads?: number | DataLink
5987
6058
  /* Output token-level timestamps in details outlet */
5988
6059
  inferTokenTimestamps?: boolean | DataLink
6060
+ /* Speaker diarization (Please use small.en-tdrz model) */
6061
+ inferTdrz?: boolean | DataLink
5989
6062
  /* Maximum segment length in characters */
5990
6063
  inferMaxLength?: number | DataLink
5991
6064
  /* Audio time offset in milliseconds */
@@ -6076,7 +6149,7 @@ Default property:
6076
6149
  /* Is transcribing */
6077
6150
  isTranscribing?: () => Data
6078
6151
  /* Progress of transcribe audio (0-100) */
6079
- transcribeFileProgress?: () => Data
6152
+ transcribeProgress?: () => Data
6080
6153
  /* Inference result */
6081
6154
  transcribeResult?: () => Data
6082
6155
  /* Inference result details */
@@ -6104,7 +6177,7 @@ export type GeneratorSpeechInference = Generator &
6104
6177
  | 'contextState'
6105
6178
  | 'contextDetails'
6106
6179
  | 'isTranscribing'
6107
- | 'transcribeFileProgress'
6180
+ | 'transcribeProgress'
6108
6181
  | 'transcribeResult'
6109
6182
  | 'transcribeDetails'
6110
6183
  | 'recordedPath'
@@ -6451,12 +6524,14 @@ Default property:
6451
6524
  completionPromptTemplateData?: {} | DataLink
6452
6525
  /* The prompt template type */
6453
6526
  completionPromptTemplateType?: '${}' | '{{}}' | DataLink
6454
- /* Grammar */
6455
- completionGrammar?: string | DataLink
6456
6527
  /* Stop words */
6457
6528
  completionStopWords?: Array<string | DataLink> | DataLink
6458
6529
  /* Number of tokens to predict */
6459
6530
  completionPredict?: number | DataLink
6531
+ /* Throttle time for completion result (in milliseconds) */
6532
+ completionResultThrottle?: number | DataLink
6533
+ /* Grammar (GBNF: Please refer to https://github.com/ggerganov/llama.cpp/tree/master/grammars) */
6534
+ completionGrammar?: string | DataLink
6460
6535
  /* Temperature */
6461
6536
  completionTemperature?: number | DataLink
6462
6537
  /* Number of probablites to show for each token in the completion details */
@@ -6493,8 +6568,6 @@ Default property:
6493
6568
  completionTypicalP?: number | DataLink
6494
6569
  /* Repeat alpha frequency penalty (default: 0.1, 0.0 = disabled) */
6495
6570
  completionIgnoreEOS?: boolean | DataLink
6496
- /* Throttle time for completion result (in milliseconds) */
6497
- completionResultThrottle?: number | DataLink
6498
6571
  }
6499
6572
  events?: {
6500
6573
  /* Event triggered when load is done */