react-native-sherpa-onnx 0.3.5 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. package/LICENSE +1 -0
  2. package/README.md +90 -21
  3. package/SherpaOnnx.podspec +3 -0
  4. package/THIRD_PARTY_LICENSES/README.md +62 -0
  5. package/THIRD_PARTY_LICENSES/ffmpeg.txt +502 -0
  6. package/THIRD_PARTY_LICENSES/libarchive.txt +65 -0
  7. package/THIRD_PARTY_LICENSES/nvidia_omla.txt +181 -0
  8. package/THIRD_PARTY_LICENSES/onnxruntime.txt +21 -0
  9. package/THIRD_PARTY_LICENSES/opus.txt +44 -0
  10. package/THIRD_PARTY_LICENSES/sherpa-onnx.txt +201 -0
  11. package/THIRD_PARTY_LICENSES/shine.txt +482 -0
  12. package/THIRD_PARTY_LICENSES/zstd.txt +30 -0
  13. package/android/build.gradle +7 -3
  14. package/android/prebuilt-download.gradle +345 -153
  15. package/android/prebuilt-versions.gradle +2 -2
  16. package/android/src/main/assets/model_licenses/asr-models-license-status.csv +409 -0
  17. package/android/src/main/assets/model_licenses/qnn-asr-models-license-status.csv +695 -0
  18. package/android/src/main/assets/model_licenses/tts-models-license-status.csv +596 -0
  19. package/android/src/main/cpp/CMakeLists.txt +28 -10
  20. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +306 -6
  21. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.h +33 -4
  22. package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-jni.cpp +266 -7
  23. package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +268 -2
  24. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +6 -2
  25. package/android/src/main/cpp/jni/model_detect/sherpa-onnx-validate-tts.cpp +4 -2
  26. package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +137 -7
  27. package/android/src/main/java/com/sherpaonnx/SherpaOnnxAssetHelper.kt +51 -6
  28. package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +159 -0
  29. package/android/src/main/java/com/sherpaonnx/SherpaOnnxOnlineSttHelper.kt +4 -1
  30. package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +112 -97
  31. package/ios/Resources/model_licenses/asr-models-license-status.csv +409 -0
  32. package/ios/Resources/model_licenses/qnn-asr-models-license-status.csv +695 -0
  33. package/ios/Resources/model_licenses/tts-models-license-status.csv +596 -0
  34. package/ios/SherpaOnnx+OnlineSTT.mm +2 -0
  35. package/ios/SherpaOnnx+PcmLiveStream.mm +2 -29
  36. package/ios/SherpaOnnx+TTS.mm +178 -20
  37. package/ios/SherpaOnnx.mm +108 -1
  38. package/ios/SherpaOnnxAudioConvert.h +10 -0
  39. package/ios/SherpaOnnxAudioConvert.mm +257 -1
  40. package/ios/archive/sherpa-onnx-archive-helper.h +10 -0
  41. package/ios/archive/sherpa-onnx-archive-helper.mm +56 -5
  42. package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +13 -2
  43. package/ios/model_detect/sherpa-onnx-validate-tts.mm +4 -2
  44. package/ios/online_stt/sherpa-onnx-online-stt-wrapper.h +1 -0
  45. package/ios/online_stt/sherpa-onnx-online-stt-wrapper.mm +4 -0
  46. package/ios/tts/sherpa-onnx-tts-wrapper.h +37 -0
  47. package/ios/tts/sherpa-onnx-tts-wrapper.mm +149 -3
  48. package/lib/module/NativeSherpaOnnx.js.map +1 -1
  49. package/lib/module/audio/index.js +8 -0
  50. package/lib/module/audio/index.js.map +1 -1
  51. package/lib/module/download/ModelDownloadManager.js +10 -929
  52. package/lib/module/download/ModelDownloadManager.js.map +1 -1
  53. package/lib/module/download/activeModelOperations.js +26 -0
  54. package/lib/module/download/activeModelOperations.js.map +1 -0
  55. package/lib/module/download/background-downloader.d.js +2 -0
  56. package/lib/module/download/background-downloader.d.js.map +1 -0
  57. package/lib/module/download/bulkPurge.js +72 -0
  58. package/lib/module/download/bulkPurge.js.map +1 -0
  59. package/lib/module/download/checksumPrompt.js +19 -0
  60. package/lib/module/download/checksumPrompt.js.map +1 -0
  61. package/lib/module/download/constants.js +7 -0
  62. package/lib/module/download/constants.js.map +1 -0
  63. package/lib/module/download/downloadEvents.js +35 -0
  64. package/lib/module/download/downloadEvents.js.map +1 -0
  65. package/lib/module/download/downloadTask.js +385 -0
  66. package/lib/module/download/downloadTask.js.map +1 -0
  67. package/lib/module/download/ensureModel.js +89 -0
  68. package/lib/module/download/ensureModel.js.map +1 -0
  69. package/lib/module/download/index.js +4 -3
  70. package/lib/module/download/index.js.map +1 -1
  71. package/lib/module/download/localModels.js +151 -0
  72. package/lib/module/download/localModels.js.map +1 -0
  73. package/lib/module/download/modelExtraction.js +174 -0
  74. package/lib/module/download/modelExtraction.js.map +1 -0
  75. package/lib/module/download/paths.js +98 -0
  76. package/lib/module/download/paths.js.map +1 -0
  77. package/lib/module/download/postDownloadProcessing.js +206 -0
  78. package/lib/module/download/postDownloadProcessing.js.map +1 -0
  79. package/lib/module/download/protectedModelKeys.js +31 -0
  80. package/lib/module/download/protectedModelKeys.js.map +1 -0
  81. package/lib/module/download/registry.js +267 -0
  82. package/lib/module/download/registry.js.map +1 -0
  83. package/lib/module/download/retry.js +59 -0
  84. package/lib/module/download/retry.js.map +1 -0
  85. package/lib/module/download/types.js +17 -0
  86. package/lib/module/download/types.js.map +1 -0
  87. package/lib/module/download/validation.js +101 -5
  88. package/lib/module/download/validation.js.map +1 -1
  89. package/lib/module/{download → extraction}/extractTarBz2.js +3 -1
  90. package/lib/module/extraction/extractTarBz2.js.map +1 -0
  91. package/lib/module/extraction/extractTarZst.js +54 -0
  92. package/lib/module/extraction/extractTarZst.js.map +1 -0
  93. package/lib/module/extraction/index.js +190 -0
  94. package/lib/module/extraction/index.js.map +1 -0
  95. package/lib/module/extraction/types.js +2 -0
  96. package/lib/module/extraction/types.js.map +1 -0
  97. package/lib/module/index.js +2 -1
  98. package/lib/module/index.js.map +1 -1
  99. package/lib/module/licenses.js +63 -0
  100. package/lib/module/licenses.js.map +1 -0
  101. package/lib/module/stt/index.js +16 -2
  102. package/lib/module/stt/index.js.map +1 -1
  103. package/lib/module/stt/streaming.js +2 -0
  104. package/lib/module/stt/streaming.js.map +1 -1
  105. package/lib/module/stt/streamingTypes.js.map +1 -1
  106. package/lib/module/stt/types.js.map +1 -1
  107. package/lib/module/tts/index.js +20 -2
  108. package/lib/module/tts/index.js.map +1 -1
  109. package/lib/module/tts/streaming.js +4 -0
  110. package/lib/module/tts/streaming.js.map +1 -1
  111. package/lib/module/tts/types.js.map +1 -1
  112. package/lib/module/utils.js +16 -1
  113. package/lib/module/utils.js.map +1 -1
  114. package/lib/typescript/src/NativeSherpaOnnx.d.ts +72 -5
  115. package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
  116. package/lib/typescript/src/audio/index.d.ts +10 -0
  117. package/lib/typescript/src/audio/index.d.ts.map +1 -1
  118. package/lib/typescript/src/download/ModelDownloadManager.d.ts +10 -108
  119. package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -1
  120. package/lib/typescript/src/download/activeModelOperations.d.ts +6 -0
  121. package/lib/typescript/src/download/activeModelOperations.d.ts.map +1 -0
  122. package/lib/typescript/src/download/bulkPurge.d.ts +14 -0
  123. package/lib/typescript/src/download/bulkPurge.d.ts.map +1 -0
  124. package/lib/typescript/src/download/checksumPrompt.d.ts +3 -0
  125. package/lib/typescript/src/download/checksumPrompt.d.ts.map +1 -0
  126. package/lib/typescript/src/download/constants.d.ts +5 -0
  127. package/lib/typescript/src/download/constants.d.ts.map +1 -0
  128. package/lib/typescript/src/download/downloadEvents.d.ts +6 -0
  129. package/lib/typescript/src/download/downloadEvents.d.ts.map +1 -0
  130. package/lib/typescript/src/download/downloadTask.d.ts +20 -0
  131. package/lib/typescript/src/download/downloadTask.d.ts.map +1 -0
  132. package/lib/typescript/src/download/ensureModel.d.ts +26 -0
  133. package/lib/typescript/src/download/ensureModel.d.ts.map +1 -0
  134. package/lib/typescript/src/download/index.d.ts +7 -5
  135. package/lib/typescript/src/download/index.d.ts.map +1 -1
  136. package/lib/typescript/src/download/localModels.d.ts +15 -0
  137. package/lib/typescript/src/download/localModels.d.ts.map +1 -0
  138. package/lib/typescript/src/download/modelExtraction.d.ts +36 -0
  139. package/lib/typescript/src/download/modelExtraction.d.ts.map +1 -0
  140. package/lib/typescript/src/download/paths.d.ts +28 -0
  141. package/lib/typescript/src/download/paths.d.ts.map +1 -0
  142. package/lib/typescript/src/download/postDownloadProcessing.d.ts +19 -0
  143. package/lib/typescript/src/download/postDownloadProcessing.d.ts.map +1 -0
  144. package/lib/typescript/src/download/protectedModelKeys.d.ts +6 -0
  145. package/lib/typescript/src/download/protectedModelKeys.d.ts.map +1 -0
  146. package/lib/typescript/src/download/registry.d.ts +14 -0
  147. package/lib/typescript/src/download/registry.d.ts.map +1 -0
  148. package/lib/typescript/src/download/retry.d.ts +15 -0
  149. package/lib/typescript/src/download/retry.d.ts.map +1 -0
  150. package/lib/typescript/src/download/types.d.ts +96 -0
  151. package/lib/typescript/src/download/types.d.ts.map +1 -0
  152. package/lib/typescript/src/download/validation.d.ts +19 -0
  153. package/lib/typescript/src/download/validation.d.ts.map +1 -1
  154. package/lib/typescript/src/extraction/extractTarBz2.d.ts.map +1 -0
  155. package/lib/typescript/src/extraction/extractTarZst.d.ts +14 -0
  156. package/lib/typescript/src/extraction/extractTarZst.d.ts.map +1 -0
  157. package/lib/typescript/src/extraction/index.d.ts +50 -0
  158. package/lib/typescript/src/extraction/index.d.ts.map +1 -0
  159. package/lib/typescript/src/extraction/types.d.ts +60 -0
  160. package/lib/typescript/src/extraction/types.d.ts.map +1 -0
  161. package/lib/typescript/src/index.d.ts +1 -0
  162. package/lib/typescript/src/index.d.ts.map +1 -1
  163. package/lib/typescript/src/licenses.d.ts +10 -0
  164. package/lib/typescript/src/licenses.d.ts.map +1 -0
  165. package/lib/typescript/src/stt/index.d.ts +4 -1
  166. package/lib/typescript/src/stt/index.d.ts.map +1 -1
  167. package/lib/typescript/src/stt/streaming.d.ts.map +1 -1
  168. package/lib/typescript/src/stt/streamingTypes.d.ts +5 -0
  169. package/lib/typescript/src/stt/streamingTypes.d.ts.map +1 -1
  170. package/lib/typescript/src/stt/types.d.ts +3 -1
  171. package/lib/typescript/src/stt/types.d.ts.map +1 -1
  172. package/lib/typescript/src/tts/index.d.ts +3 -1
  173. package/lib/typescript/src/tts/index.d.ts.map +1 -1
  174. package/lib/typescript/src/tts/streaming.d.ts.map +1 -1
  175. package/lib/typescript/src/tts/types.d.ts +6 -5
  176. package/lib/typescript/src/tts/types.d.ts.map +1 -1
  177. package/lib/typescript/src/utils.d.ts +5 -0
  178. package/lib/typescript/src/utils.d.ts.map +1 -1
  179. package/package.json +11 -1
  180. package/scripts/{check-model-csvs.sh → ci/check-model-csvs.sh} +9 -2
  181. package/scripts/ci/collect_all_sherpa_model_streams.sh +101 -0
  182. package/scripts/ci/collect_one_sherpa_release_stream.sh +189 -0
  183. package/scripts/ci/sherpa_asr_model_release_streams.json +21 -0
  184. package/scripts/ci/sherpa_tts_model_release_streams.json +13 -0
  185. package/scripts/ci/update_model_license_csv.sh +765 -0
  186. package/scripts/setup-ios-framework.sh +14 -11
  187. package/scripts/update_commercial_use.js +73 -0
  188. package/src/NativeSherpaOnnx.ts +92 -5
  189. package/src/audio/index.ts +20 -0
  190. package/src/download/ModelDownloadManager.ts +55 -1343
  191. package/src/download/activeModelOperations.ts +38 -0
  192. package/src/download/background-downloader.d.ts +43 -0
  193. package/src/download/bulkPurge.ts +102 -0
  194. package/src/download/checksumPrompt.ts +25 -0
  195. package/src/download/constants.ts +5 -0
  196. package/src/download/downloadEvents.ts +55 -0
  197. package/src/download/downloadTask.ts +497 -0
  198. package/src/download/ensureModel.ts +124 -0
  199. package/src/download/index.ts +19 -2
  200. package/src/download/localModels.ts +234 -0
  201. package/src/download/modelExtraction.ts +244 -0
  202. package/src/download/paths.ts +134 -0
  203. package/src/download/postDownloadProcessing.ts +292 -0
  204. package/src/download/protectedModelKeys.ts +30 -0
  205. package/src/download/registry.ts +404 -0
  206. package/src/download/retry.ts +76 -0
  207. package/src/download/types.ts +120 -0
  208. package/src/download/validation.ts +114 -8
  209. package/src/{download → extraction}/extractTarBz2.ts +3 -1
  210. package/src/extraction/extractTarZst.ts +79 -0
  211. package/src/extraction/index.ts +269 -0
  212. package/src/extraction/types.ts +63 -0
  213. package/src/index.tsx +2 -0
  214. package/src/licenses.ts +100 -0
  215. package/src/stt/index.ts +20 -2
  216. package/src/stt/streaming.ts +3 -0
  217. package/src/stt/streamingTypes.ts +5 -0
  218. package/src/stt/types.ts +3 -1
  219. package/src/tts/index.ts +30 -2
  220. package/src/tts/streaming.ts +10 -0
  221. package/src/tts/types.ts +6 -5
  222. package/src/utils.ts +22 -1
  223. package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -1
  224. package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -1
  225. package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -1
  226. package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -1
  227. package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +0 -301
  228. package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +0 -187
  229. package/lib/module/download/extractTarBz2.js.map +0 -1
  230. package/lib/typescript/src/download/extractTarBz2.d.ts.map +0 -1
  231. package/scripts/check-qnn-support.sh +0 -78
  232. /package/lib/typescript/src/{download → extraction}/extractTarBz2.d.ts +0 -0
@@ -0,0 +1,765 @@
1
+ #!/usr/bin/env bash
2
+ # Update model-license CSV from release asset list and pre-collected tree-cache.
3
+ #
4
+ # Goal: map each release asset (same names as *-models-expected.csv) to license_type and
5
+ # commercial_use hints for app distribution (ads, IAP). Not legal advice.
6
+ #
7
+ # Behavior:
8
+ # - Reads existing CSV if present; preserves rows and manual edits.
9
+ # - Merges in all assets from asset-list.txt (release); adds new rows with empty license_type.
10
+ # - Skips any asset with detection_source `manual` (hand-maintained row; never overwritten).
11
+ # - Skips any asset whose license_type and commercial_use are both set and neither is `unknown`
12
+ # (case-insensitive). Rows with empty or `unknown` license_type and/or empty or `unknown`
13
+ # commercial_use are (re)processed. license_type `exhausted` is never auto-overwritten.
14
+ # - Uses tree-cache (from asr/tts-models-structure.txt + new downloads) to see if a LICENSE-like
15
+ # path exists — no full extract unless we need file contents for detection.
16
+ # - Downloads the .tar.bz2 only when a license-like path was found and license_type is still empty.
17
+ # - Pipeline: try archive (if applicable) → HF/ModelScope fallbacks for eligible assets. If no license
18
+ # is found after all attempts, license_type is set to exhausted (default keyword; override with
19
+ # LICENSE_EXHAUSTED env). You can set exhausted manually after review.
20
+ # - .onnx-only: exhausted (no archive to scan).
21
+ # - HF fallback (vits-piper-*.tar.bz2, sherpa-onnx-*.tar.bz2): repo slug = asset basename without .tar.bz2
22
+ # under HF_MODEL_OWNER (default csukuangfj). Try MODEL_CARD (* License: …) then README.md YAML
23
+ # (---\nlicense: …). First successful source wins (HF before ModelScope). Only if HF has no license but
24
+ # README.md links to modelscope.cn/models/…, fetch that /summary HTML and read License from
25
+ # window.__detail_data__ JSON (\"License\":\"…\").
26
+ # license_file = HF repo URL or ModelScope summary URL; detection_source = huggingface_model_card or
27
+ # modelscope_detail_json. Release tarball names must match HF repo names or fetch 404s.
28
+ # - QNN binary stream (see --stream-id asr-models-qnn-binary, or QNN in asset name, or qnn-*-license-status.csv):
29
+ # after archive scan + HF fallback still yield nothing, last resort looks up a matching row in
30
+ # asr-models-license-status.csv (default: same directory as --csv). Strip prefix
31
+ # sherpa-onnx-qnn-<soc>-binary-<n>-seconds- from the QNN asset name, then try a few derived filenames
32
+ # (exact, sherpa-onnx-…, and sherpa-onnx-<stem>.tar.bz2 when …-int8.tar.bz2). On match, copy the ASR row’s
33
+ # license fields (not asset_name) onto the QNN asset; on no match → exhausted like other dead ends.
34
+ # - Hugging Face: set HF_TOKEN or HUGGINGFACE_HUB_TOKEN (read token is enough for public repos). Anonymous
35
+ # requests from CI often get HTTP 401; without a token README/MODEL_CARD cannot be fetched.
36
+ #
37
+ # Note: With `set -u`, ${#empty_assoc[@]} and ${!empty_assoc[@]} can error on some Bash builds;
38
+ # we avoid that below.
39
+
40
+ set -euo pipefail
41
+
42
+ if (( BASH_VERSINFO[0] < 4 )); then
43
+ echo "This script requires Bash version 4+ (for associative arrays)." >&2
44
+ exit 1
45
+ fi
46
+
47
+ ASSET_LIST=""
48
+ TREE_CACHE_DIR=""
49
+ CSV_FILE=""
50
+ STREAM_ID=""
51
+ ASR_LICENSE_CSV=""
52
+
53
+ while [[ $# -gt 0 ]]; do
54
+ case $1 in
55
+ --asset-list) ASSET_LIST="$2"; shift 2 ;;
56
+ --tree-cache-dir) TREE_CACHE_DIR="$2"; shift 2 ;;
57
+ --csv) CSV_FILE="$2"; shift 2 ;;
58
+ --stream-id) STREAM_ID="$2"; shift 2 ;;
59
+ --asr-license-csv) ASR_LICENSE_CSV="$2"; shift 2 ;;
60
+ *) echo "Unknown parameter $1"; exit 1 ;;
61
+ esac
62
+ done
63
+
64
+ if [[ -z "$ASSET_LIST" || -z "$TREE_CACHE_DIR" || -z "$CSV_FILE" ]]; then
65
+ echo "Usage: $0 --asset-list <path> --tree-cache-dir <dir> --csv <path> [--stream-id <id>] [--asr-license-csv <path>]"
66
+ exit 1
67
+ fi
68
+
69
+ if [[ -z "$ASR_LICENSE_CSV" ]]; then
70
+ ASR_LICENSE_CSV="$(cd "$(dirname "$CSV_FILE")" && pwd)/asr-models-license-status.csv"
71
+ fi
72
+
73
+ # Authenticated GitHub downloads (CI: GITHUB_TOKEN; local: GITHUB_TOKEN or GH_TOKEN).
74
+ _GH_TOKEN="${GITHUB_TOKEN:-${GH_TOKEN:-}}"
75
+ # Hugging Face raw file fetches (CI: often required to avoid 401 on huggingface.co).
76
+ _HF_TOKEN="${HF_TOKEN:-${HUGGINGFACE_HUB_TOKEN:-}}"
77
+ # Hugging Face repo slug matches release asset name without .tar.bz2 (e.g. vits-piper-pl_PL-darkman-medium).
78
+ HF_MODEL_OWNER="${HF_MODEL_OWNER:-csukuangfj}"
79
+ # license_type when all automated sources were tried and none yielded a license (skip on future runs).
80
+ LICENSE_EXHAUSTED="${LICENSE_EXHAUSTED:-exhausted}"
81
+
82
+ declare -A LICENSE_LIKE_BASENAMES=(
83
+ ["license"]=1 ["license.txt"]=1 ["licence"]=1 ["licence.txt"]=1
84
+ ["copying"]=1 ["copying.txt"]=1 ["notice"]=1 ["notice.txt"]=1
85
+ ["copyright"]=1 ["copyright.txt"]=1 ["model_license"]=1 ["model_license.txt"]=1
86
+ ["license.md"]=1 ["licence.md"]=1 ["copying.md"]=1 ["notice.md"]=1
87
+ )
88
+
89
+ declare -A existing_asset_name
90
+ declare -A existing_license_type
91
+ declare -A existing_commercial_use
92
+ declare -A existing_confidence
93
+ declare -A existing_detection_source
94
+ declare -A existing_license_file
95
+
96
+ read_csv() {
97
+ local csv_path="$1"
98
+ if [[ ! -f "$csv_path" ]]; then return; fi
99
+
100
+ local is_header=1
101
+ while IFS=, read -r asset_name license_type commercial_use confidence detection_source license_file remainder; do
102
+ # Remove carriage returns
103
+ asset_name="${asset_name%$'\r'}"
104
+ license_file="${license_file%$'\r'}"
105
+ if [[ "$is_header" -eq 1 ]]; then
106
+ is_header=0
107
+ continue
108
+ fi
109
+ # strip quotes
110
+ asset_name="${asset_name%\"}"; asset_name="${asset_name#\"}"
111
+ if [[ -z "$asset_name" ]]; then continue; fi
112
+
113
+ existing_asset_name["$asset_name"]="$asset_name"
114
+
115
+ license_type="${license_type%\"}"; license_type="${license_type#\"}"
116
+ existing_license_type["$asset_name"]="$license_type"
117
+
118
+ commercial_use="${commercial_use%\"}"; commercial_use="${commercial_use#\"}"
119
+ existing_commercial_use["$asset_name"]="$commercial_use"
120
+
121
+ confidence="${confidence%\"}"; confidence="${confidence#\"}"
122
+ existing_confidence["$asset_name"]="$confidence"
123
+
124
+ detection_source="${detection_source%\"}"; detection_source="${detection_source#\"}"
125
+ existing_detection_source["$asset_name"]="$detection_source"
126
+
127
+ license_file="${license_file%\"}"; license_file="${license_file#\"}"
128
+ existing_license_file["$asset_name"]="$license_file"
129
+ done < "$csv_path"
130
+ }
131
+
132
+ read_csv "$CSV_FILE"
133
+
134
+ # Row count for logging (avoid ${#assoc[@]} on empty assoc under set -u on some Bash versions).
135
+ existing_csv_rows=0
136
+ if [[ -f "$CSV_FILE" ]]; then
137
+ existing_csv_rows=$(($(grep -cve '^[[:space:]]*$' "$CSV_FILE" 2>/dev/null || echo 0)))
138
+ ((existing_csv_rows > 0)) && ((existing_csv_rows--)) # minus header
139
+ ((existing_csv_rows < 0)) && existing_csv_rows=0
140
+ fi
141
+
142
+ echo "=== update_model_license_csv.sh ==="
143
+ echo "CSV path: $CSV_FILE"
144
+ [[ -n "$STREAM_ID" ]] && echo "Stream id: $STREAM_ID"
145
+ echo "ASR license lookup (QNN fallback): $ASR_LICENSE_CSV"
146
+ echo "Existing data rows in CSV (excl. header, by line count): $existing_csv_rows"
147
+
148
+ declare -a release_assets=()
149
+ declare -A asset_urls=()
150
+
151
+ if [[ -f "$ASSET_LIST" ]]; then
152
+ while IFS='|' read -r name url; do
153
+ name="${name%$'\r'}"
154
+ url="${url%$'\r'}"
155
+ # trim spaces
156
+ name="$(echo -n "$name" | xargs)"
157
+ url="$(echo -n "$url" | xargs)"
158
+ if [[ -n "$name" ]]; then
159
+ release_assets+=("$name")
160
+ asset_urls["$name"]="$url"
161
+ if [[ -z "${existing_asset_name["$name"]:-}" ]]; then
162
+ existing_asset_name["$name"]="$name"
163
+ existing_license_type["$name"]=""
164
+ existing_commercial_use["$name"]=""
165
+ existing_confidence["$name"]=""
166
+ existing_detection_source["$name"]=""
167
+ existing_license_file["$name"]=""
168
+ fi
169
+ fi
170
+ done < "$ASSET_LIST"
171
+ fi
172
+
173
+ echo "Asset list file: ${ASSET_LIST:-<none>}"
174
+ echo "Tree cache dir: $TREE_CACHE_DIR"
175
+ echo "Release assets to consider: ${#release_assets[@]}"
176
+ if [[ ${#release_assets[@]} -eq 0 ]]; then
177
+ echo "Note: empty asset list — output CSV will only contain header plus any assets already in CSV but not on release (sorted)."
178
+ fi
179
+ echo "--- per-asset license pass ---"
180
+
181
+ get_safe_name() {
182
+ local name="$1"
183
+ name="${name//\//-}"
184
+ name="${name//\\/-}"
185
+ echo "$name"
186
+ }
187
+
188
+ set_exhausted() {
189
+ local name="$1"
190
+ existing_license_type["$name"]="$LICENSE_EXHAUSTED"
191
+ existing_commercial_use["$name"]="unknown"
192
+ existing_confidence["$name"]="high"
193
+ existing_detection_source["$name"]="scan_exhausted"
194
+ existing_license_file["$name"]=""
195
+ }
196
+
197
+ set_detected() {
198
+ local name="$1"
199
+ local l_type="$2"
200
+ local c_use="$3"
201
+ local conf="$4"
202
+ local file="$5"
203
+ existing_license_type["$name"]="$l_type"
204
+ existing_commercial_use["$name"]="$c_use"
205
+ existing_confidence["$name"]="$conf"
206
+ existing_detection_source["$name"]="archive_license_file"
207
+ existing_license_file["$name"]="$file"
208
+ }
209
+
210
+ set_hf_model_card() {
211
+ local name="$1"
212
+ local l_type="$2"
213
+ local c_use="$3"
214
+ local conf="$4"
215
+ local page_url="$5"
216
+ local detection_src="${6:-huggingface_model_card}"
217
+ existing_license_type["$name"]="$l_type"
218
+ existing_commercial_use["$name"]="$c_use"
219
+ existing_confidence["$name"]="$conf"
220
+ existing_detection_source["$name"]="$detection_src"
221
+ existing_license_file["$name"]="$page_url"
222
+ }
223
+
224
+ # Prints file body to stdout; returns 0 on HTTP success. Suppresses curl stderr (expected 404 on MODEL_CARD).
225
+ fetch_hf_repo_file() {
226
+ local slug="$1"
227
+ local filename="$2"
228
+ local -a _hf_curl=(-sfSL)
229
+ if [[ -n "$_HF_TOKEN" ]]; then
230
+ _hf_curl+=(-H "Authorization: Bearer ${_HF_TOKEN}")
231
+ fi
232
+ _hf_curl+=("https://huggingface.co/${HF_MODEL_OWNER}/${slug}/raw/main/${filename}")
233
+ curl "${_hf_curl[@]}" 2>/dev/null
234
+ }
235
+
236
+ # Extracts the first "* License: value" line (case-insensitive on the label).
237
+ parse_model_card_license_field() {
238
+ local card="$1"
239
+ local line lic
240
+ while IFS= read -r line || [[ -n "$line" ]]; do
241
+ line="${line%$'\r'}"
242
+ if [[ "$line" =~ ^[*][[:space:]]*[Ll]icense:[[:space:]]*(.*) ]]; then
243
+ lic="${BASH_REMATCH[1]}"
244
+ lic="$(echo -n "$lic" | xargs)"
245
+ if [[ -n "$lic" ]]; then
246
+ echo -n "$lic"
247
+ return 0
248
+ fi
249
+ fi
250
+ done <<< "$card"
251
+ return 1
252
+ }
253
+
254
+ # Hugging Face model cards often use YAML front matter: ---\nlicense: apache-2.0\n---
255
+ parse_readme_yaml_license_field() {
256
+ local readme="$1"
257
+ local line val in_fm=0
258
+
259
+ while IFS= read -r line || [[ -n "$line" ]]; do
260
+ line="${line%$'\r'}"
261
+ if [[ "$line" == "---" ]]; then
262
+ if [[ "$in_fm" -eq 0 ]]; then
263
+ in_fm=1
264
+ elif [[ "$in_fm" -eq 1 ]]; then
265
+ break
266
+ fi
267
+ continue
268
+ fi
269
+ if [[ "$in_fm" -eq 1 ]] && [[ "$line" =~ ^[Ll]icense:[[:space:]]*(.*) ]]; then
270
+ val="${BASH_REMATCH[1]}"
271
+ val="$(echo -n "$val" | xargs)"
272
+ val="${val#\"}"; val="${val%\"}"
273
+ val="${val#\'}"; val="${val%\'}"
274
+ if [[ -n "$val" ]]; then
275
+ echo -n "$val"
276
+ return 0
277
+ fi
278
+ fi
279
+ done <<< "$readme"
280
+
281
+ while IFS= read -r line || [[ -n "$line" ]]; do
282
+ line="${line%$'\r'}"
283
+ if [[ "$line" =~ ^[Ll]icense:[[:space:]]*(.*) ]]; then
284
+ val="${BASH_REMATCH[1]}"
285
+ val="$(echo -n "$val" | xargs)"
286
+ val="${val#\"}"; val="${val%\"}"
287
+ val="${val#\'}"; val="${val%\'}"
288
+ if [[ -n "$val" ]]; then
289
+ echo -n "$val"
290
+ return 0
291
+ fi
292
+ fi
293
+ done <<< "$readme"
294
+ return 1
295
+ }
296
+
297
+ # First https://modelscope.cn/models/… URL in text (HF README often links here without YAML license).
298
+ extract_first_modelscope_models_url() {
299
+ local readme="$1"
300
+ local url
301
+ url="$(printf '%s\n' "$readme" | grep -oE 'https?://(www\.)?modelscope\.cn/models/[A-Za-z0-9_./%-]+' | head -1)"
302
+ if [[ -z "$url" ]]; then
303
+ url="$(printf '%s\n' "$readme" | grep -oE '(www\.)?modelscope\.cn/models/[A-Za-z0-9_./%-]+' | head -1)"
304
+ if [[ -n "$url" && "$url" != http://* && "$url" != https://* ]]; then
305
+ url="https://${url}"
306
+ fi
307
+ fi
308
+ [[ -n "$url" ]] || return 1
309
+ echo -n "$url"
310
+ }
311
+
312
+ # ModelScope model pages embed JSON in HTML; License field uses escaped quotes: \"License\":\"Apache License 2.0\"
313
+ normalize_modelscope_summary_url() {
314
+ local u="$1"
315
+ u="${u%%\?*}"
316
+ u="${u%/}"
317
+ if [[ "$u" != */summary ]]; then
318
+ u="${u}/summary"
319
+ fi
320
+ echo -n "$u"
321
+ }
322
+
323
+ fetch_modelscope_summary_html() {
324
+ local url="$1"
325
+ url="$(normalize_modelscope_summary_url "$url")"
326
+ local -a _ms_curl=(-sfSL -A "Mozilla/5.0 (compatible; react-native-sherpa-onnx-license-update/1.0)")
327
+ _ms_curl+=("$url")
328
+ curl "${_ms_curl[@]}" 2>/dev/null
329
+ }
330
+
331
+ parse_modelscope_license_from_html() {
332
+ local html="$1"
333
+ local lic
334
+ lic="$(printf '%s' "$html" | sed -n 's/.*License\\":\\"\([^\\]*\)\\".*/\1/p' | head -1)"
335
+ lic="$(echo -n "$lic" | xargs)"
336
+ if [[ -n "$lic" ]]; then
337
+ echo -n "$lic"
338
+ return 0
339
+ fi
340
+ lic="$(printf '%s' "$html" | sed -n 's/.*"License":"\([^"]*\)".*/\1/p' | head -1)"
341
+ lic="$(echo -n "$lic" | xargs)"
342
+ if [[ -n "$lic" ]]; then
343
+ echo -n "$lic"
344
+ return 0
345
+ fi
346
+ return 1
347
+ }
348
+
349
+ asset_eligible_for_hf_license_fallback() {
350
+ local asset_name="$1"
351
+ [[ "$asset_name" == vits-piper-*.tar.bz2 || "$asset_name" == sherpa-onnx-*.tar.bz2 ]]
352
+ }
353
+
354
+ # After try_hf_model_card_fallback succeeds, existing_detection_source is set — map to log label.
355
+ log_license_fallback_source() {
356
+ local name="$1"
357
+ case "${existing_detection_source["$name"]:-}" in
358
+ modelscope_detail_json)
359
+ echo "ModelScope (via link in Hugging Face README)"
360
+ ;;
361
+ huggingface_model_card)
362
+ echo "Hugging Face (MODEL_CARD or README)"
363
+ ;;
364
+ *)
365
+ echo "online metadata"
366
+ ;;
367
+ esac
368
+ }
369
+
370
+ # Try MODEL_CARD, then README.md YAML; only if still no license, follow modelscope.cn link from README.
371
+ try_hf_model_card_fallback() {
372
+ local asset_name="$1"
373
+ local slug page_url card readme raw_lic det l_res c_res conf_res
374
+ local license_ref_url license_ref_src ms_url ms_html ms_raw
375
+
376
+ asset_eligible_for_hf_license_fallback "$asset_name" || return 1
377
+
378
+ slug="${asset_name%.tar.bz2}"
379
+ page_url="https://huggingface.co/${HF_MODEL_OWNER}/${slug}"
380
+ license_ref_url="$page_url"
381
+ license_ref_src="huggingface_model_card"
382
+
383
+ readme=""
384
+ raw_lic=""
385
+ if card="$(fetch_hf_repo_file "$slug" "MODEL_CARD")"; then
386
+ raw_lic="$(parse_model_card_license_field "$card")" || raw_lic=""
387
+ fi
388
+ if [[ -z "$raw_lic" ]]; then
389
+ if readme="$(fetch_hf_repo_file "$slug" "README.md")"; then
390
+ raw_lic="$(parse_readme_yaml_license_field "$readme")" || raw_lic=""
391
+ fi
392
+ fi
393
+
394
+ # ModelScope only when HF did not yield a license (README must have been fetched and link MS).
395
+ if [[ -z "$raw_lic" && -n "$readme" ]] && ms_url="$(extract_first_modelscope_models_url "$readme")"; then
396
+ ms_html="$(fetch_modelscope_summary_html "$ms_url")" || ms_html=""
397
+ if [[ -n "$ms_html" ]] && ms_raw="$(parse_modelscope_license_from_html "$ms_html")"; then
398
+ raw_lic="$ms_raw"
399
+ license_ref_url="$(normalize_modelscope_summary_url "$ms_url")"
400
+ license_ref_src="modelscope_detail_json"
401
+ fi
402
+ fi
403
+
404
+ [[ -n "$raw_lic" ]] || return 1
405
+
406
+ det="$(detect_license "$raw_lic")"
407
+ l_res="$(echo "$det" | cut -d'|' -f1)"
408
+ c_res="$(echo "$det" | cut -d'|' -f2)"
409
+ conf_res="$(echo "$det" | cut -d'|' -f3)"
410
+
411
+ if [[ "$l_res" == "unknown" ]]; then
412
+ set_hf_model_card "$asset_name" "$raw_lic" "unknown" "medium" "$license_ref_url" "$license_ref_src"
413
+ else
414
+ set_hf_model_card "$asset_name" "$l_res" "$c_res" "$conf_res" "$license_ref_url" "$license_ref_src"
415
+ fi
416
+ return 0
417
+ }
418
+
419
+ # QNN binary assets: mirror license row from asr-models-license-status.csv (last resort).
420
+ qnn_license_fallback_context() {
421
+ [[ "${STREAM_ID:-}" == "asr-models-qnn-binary" ]] && return 0
422
+ [[ "$(basename "$CSV_FILE")" == "qnn-asr-models-license-status.csv" ]] && return 0
423
+ [[ "$1" == *[Qq][Nn][Nn]* ]] && return 0
424
+ return 1
425
+ }
426
+
427
+ strip_qnn_binary_asset_prefix() {
428
+ local n="$1"
429
+ if [[ "$n" =~ ^sherpa-onnx-qnn-[^-]+-binary-[0-9]+-seconds-(.+)$ ]]; then
430
+ echo -n "${BASH_REMATCH[1]}"
431
+ return 0
432
+ fi
433
+ return 1
434
+ }
435
+
436
+ # First CSV data row whose first field equals want (after stripping CR); empty if none.
437
+ asr_license_csv_row_for_asset_name() {
438
+ local csv="$1"
439
+ local want="$2"
440
+ [[ -f "$csv" ]] || return 1
441
+ awk -F',' -v n="$want" '
442
+ NR == 1 { next }
443
+ {
444
+ key = $1
445
+ sub(/\r$/, "", key)
446
+ if (key == n) { print; exit }
447
+ }
448
+ ' "$csv"
449
+ }
450
+
451
+ # Apply ASR CSV line to QNN asset (same 6 columns as our CSV; keeps QNN asset_name as row key).
452
+ apply_asr_license_line_to_qnn_asset() {
453
+ local qnn_asset="$1"
454
+ local line="$2"
455
+ line="${line%$'\r'}"
456
+ local asr_asset license_type commercial_use confidence detection_source license_file remainder
457
+ IFS=',' read -r asr_asset license_type commercial_use confidence detection_source license_file remainder <<< "$line"
458
+ if [[ -n "${remainder:-}" ]]; then
459
+ license_file="${license_file},${remainder}"
460
+ fi
461
+ license_type="${license_type%\"}"; license_type="${license_type#\"}"
462
+ commercial_use="${commercial_use%\"}"; commercial_use="${commercial_use#\"}"
463
+ confidence="${confidence%\"}"; confidence="${confidence#\"}"
464
+ detection_source="${detection_source%\"}"; detection_source="${detection_source#\"}"
465
+ license_file="${license_file%\"}"; license_file="${license_file#\"}"
466
+ existing_license_type["$qnn_asset"]="$license_type"
467
+ existing_commercial_use["$qnn_asset"]="$commercial_use"
468
+ existing_confidence["$qnn_asset"]="$confidence"
469
+ existing_detection_source["$qnn_asset"]="$detection_source"
470
+ existing_license_file["$qnn_asset"]="$license_file"
471
+ }
472
+
473
+ # Set on successful try_qnn_asr_license_fallback (do not capture that function in $(…): subshell drops assoc-array updates).
474
+ _QNN_ASR_MIRROR_MATCHED=""
475
+
476
+ try_qnn_asr_license_fallback() {
477
+ local asset_name="$1"
478
+ local derived cand row matched_asr=""
479
+ local -a cands=()
480
+ local -A tried=()
481
+ _QNN_ASR_MIRROR_MATCHED=""
482
+ qnn_license_fallback_context "$asset_name" || return 1
483
+ derived="$(strip_qnn_binary_asset_prefix "$asset_name")" || return 1
484
+ cands+=("$derived")
485
+ if [[ "$derived" != sherpa-onnx-* ]]; then
486
+ cands+=("sherpa-onnx-${derived}")
487
+ fi
488
+ if [[ "$derived" == *-int8.tar.bz2 ]]; then
489
+ cands+=("sherpa-onnx-${derived%-int8.tar.bz2}.tar.bz2")
490
+ fi
491
+ row=""
492
+ for cand in "${cands[@]}"; do
493
+ [[ -z "$cand" || -n "${tried["$cand"]:-}" ]] && continue
494
+ tried["$cand"]=1
495
+ row="$(asr_license_csv_row_for_asset_name "$ASR_LICENSE_CSV" "$cand")"
496
+ if [[ -n "$row" ]]; then
497
+ matched_asr="$cand"
498
+ break
499
+ fi
500
+ done
501
+ [[ -n "$row" ]] || return 1
502
+ apply_asr_license_line_to_qnn_asset "$asset_name" "$row"
503
+ _QNN_ASR_MIRROR_MATCHED="$matched_asr"
504
+ return 0
505
+ }
506
+
507
+ detect_license() {
508
+ local t="$1"
509
+ t="$(echo "$t" | tr '[:upper:]' '[:lower:]' | tr -s ' \r\n\t' ' ')"
510
+
511
+ if [[ "$t" == *"cc0"* || "$t" == *"cc-0"* || "$t" == *"creative commons zero"* || "$t" == *"public domain dedication"* ]]; then echo "cc0|yes|high"
512
+ elif [[ "$t" == *"apache-2.0"* || "$t" == *"apache 2.0"* ]]; then echo "apache-2.0|yes|high"
513
+ elif [[ "$t" == *"apache license 2.0"* ]]; then echo "apache-2.0|yes|high"
514
+ elif [[ "$t" == *"apache license"* && "$t" == *"version 2.0"* ]]; then echo "apache-2.0|yes|high"
515
+ elif [[ "$t" == "mit" || "$t" == *"mit license"* ]]; then echo "mit|yes|high"
516
+ elif [[ "$t" == *"bsd 3-clause"* || ( "$t" == *"redistribution and use in source and binary forms"* && "$t" == *"neither the name"* ) ]]; then echo "bsd-3-clause|yes|medium"
517
+ elif [[ "$t" == *"bsd 2-clause"* ]]; then echo "bsd-2-clause|yes|medium"
518
+ elif [[ "$t" == *"mozilla public license"* && "$t" == *"2.0"* ]]; then echo "mpl-2.0|yes|high"
519
+ elif [[ "$t" == *"isc license"* ]]; then echo "isc|yes|medium"
520
+ elif [[ "$t" == *"the unlicense"* ]]; then echo "unlicense|yes|medium"
521
+ elif [[ "$t" == *"zlib license"* ]]; then echo "zlib|yes|medium"
522
+ elif [[ "$t" == *"gnu affero general public license"* ]]; then echo "agpl-3.0|conditional|high"
523
+ elif [[ "$t" == *"gnu lesser general public license"* ]]; then
524
+ if [[ "$t" == *"version 2.1"* ]]; then echo "lgpl-2.1|conditional|high"
525
+ elif [[ "$t" == *"version 3"* ]]; then echo "lgpl-3.0|conditional|high"
526
+ else echo "lgpl|conditional|medium"; fi
527
+ elif [[ "$t" == *"gnu general public license"* ]]; then
528
+ if [[ "$t" == *"version 3"* ]]; then echo "gpl-3.0|conditional|high"
529
+ elif [[ "$t" == *"version 2"* ]]; then echo "gpl-2.0|conditional|high"
530
+ else echo "gpl|conditional|medium"; fi
531
+ elif [[ "$t" == *"creative commons"* && "$t" == *"noncommercial"* ]]; then
532
+ if [[ "$t" == *"4.0"* ]]; then echo "cc-by-nc-4.0|no|high"
533
+ else echo "cc-by-nc|no|medium"; fi
534
+ elif [[ "$t" == *"creative commons attribution 4.0"* || ( "$t" == *"creative commons"* && "$t" == *"attribution"* && "$t" == *"4.0"* ) ]]; then echo "cc-by-4.0|yes|high"
535
+ elif [[ "$t" == *"non-commercial"* || "$t" == *"non commercial"* ]]; then echo "custom-non-commercial|no|medium"
536
+ elif [[ "$t" == *"research only"* || "$t" == *"for research purposes only"* ]]; then echo "custom-research-only|no|medium"
537
+ else echo "unknown|unknown|low"
538
+ fi
539
+ }
540
+
541
+ for asset_name in "${release_assets[@]}"; do
542
+ url="${asset_urls["$asset_name"]}"
543
+
544
+ l_type="${existing_license_type["$asset_name"]:-}"
545
+ l_type="$(echo -n "$l_type" | xargs)"
546
+ l_type_lc="$(echo -n "$l_type" | tr '[:upper:]' '[:lower:]')"
547
+ c_use="${existing_commercial_use["$asset_name"]:-}"
548
+ c_use="$(echo -n "$c_use" | xargs)"
549
+ c_use_lc="$(echo -n "$c_use" | tr '[:upper:]' '[:lower:]')"
550
+ det_src="${existing_detection_source["$asset_name"]:-}"
551
+ det_src="$(echo -n "$det_src" | xargs)"
552
+ det_src_lc="$(echo -n "$det_src" | tr '[:upper:]' '[:lower:]')"
553
+ if [[ "$det_src_lc" == "manual" ]]; then
554
+ echo " $asset_name — skip (detection_source=manual)"
555
+ continue
556
+ fi
557
+ if [[ "$l_type_lc" == "exhausted" ]]; then
558
+ echo " $asset_name — skip (license_type=exhausted; clear to re-run automation)"
559
+ continue
560
+ fi
561
+ # Only (re)fill when license_type or commercial_use is empty or explicitly unknown.
562
+ if [[ -n "$l_type" && "$l_type_lc" != "unknown" && -n "$c_use" && "$c_use_lc" != "unknown" ]]; then
563
+ echo " $asset_name — skip (license_type and commercial_use already set)"
564
+ continue
565
+ fi
566
+
567
+ if [[ "$asset_name" == *.onnx ]]; then
568
+ set_exhausted "$asset_name"
569
+ echo " $asset_name — .onnx bundle → license_type=$LICENSE_EXHAUSTED (no archive; skipped next run)"
570
+ continue
571
+ fi
572
+
573
+ safe_name="$(get_safe_name "$asset_name")"
574
+ tree_path="${TREE_CACHE_DIR}/${safe_name}.txt"
575
+
576
+ declare -a license_paths=()
577
+ if [[ -f "$tree_path" ]]; then
578
+ declare -A seen_paths=()
579
+ while IFS= read -r line; do
580
+ s="${line%$'\r'}"
581
+ s="$(echo -n "$s" | xargs)"
582
+ if [[ -z "$s" || "$s" == */ ]]; then continue; fi
583
+
584
+ base="${s##*/}"
585
+ base_lower="$(echo -n "$base" | tr '[:upper:]' '[:lower:]')"
586
+
587
+ if [[ -n "${LICENSE_LIKE_BASENAMES["$base_lower"]:-}" ]]; then
588
+ if [[ -z "${seen_paths["$s"]:-}" ]]; then
589
+ license_paths+=("$s")
590
+ seen_paths["$s"]=1
591
+ fi
592
+ elif [[ "$base_lower" == *"license"* || "$base_lower" == *"licence"* ]]; then
593
+ if [[ -z "${seen_paths["$s"]:-}" ]]; then
594
+ license_paths+=("$s")
595
+ seen_paths["$s"]=1
596
+ fi
597
+ fi
598
+ done < "$tree_path"
599
+ unset seen_paths
600
+ fi
601
+
602
+ if [[ ${#license_paths[@]} -eq 0 ]]; then
603
+ if try_hf_model_card_fallback "$asset_name"; then
604
+ echo " $asset_name — no license in tree → filled from $(log_license_fallback_source "$asset_name") (license_type=${existing_license_type["$asset_name"]})"
605
+ continue
606
+ fi
607
+ if try_qnn_asr_license_fallback "$asset_name"; then
608
+ echo " $asset_name — no license in tree + HF exhausted → QNN mirror from asr row (${_QNN_ASR_MIRROR_MATCHED}) (license_type=${existing_license_type["$asset_name"]})"
609
+ continue
610
+ fi
611
+ set_exhausted "$asset_name"
612
+ echo " $asset_name — no license in tree + fallbacks exhausted → license_type=$LICENSE_EXHAUSTED"
613
+ continue
614
+ fi
615
+
616
+ echo " $asset_name — found ${#license_paths[@]} license-like path(s), downloading archive…"
617
+ td="$(mktemp -d -t model-license-XXXXXX)"
618
+ archive_path="${td}/${safe_name}"
619
+
620
+ _curl_dl=(-sSL)
621
+ if [[ -n "$_GH_TOKEN" && "$url" == *"github.com"* ]]; then
622
+ _curl_dl+=(-H "Authorization: Bearer ${_GH_TOKEN}" -H "Accept: application/octet-stream")
623
+ fi
624
+ if ! curl "${_curl_dl[@]}" -o "$archive_path" "$url"; then
625
+ rm -rf "$td"
626
+ if try_hf_model_card_fallback "$asset_name"; then
627
+ echo " $asset_name — download failed → filled from $(log_license_fallback_source "$asset_name") (license_type=${existing_license_type["$asset_name"]})"
628
+ continue
629
+ fi
630
+ if try_qnn_asr_license_fallback "$asset_name"; then
631
+ echo " $asset_name — download failed + HF exhausted → QNN mirror from asr row (${_QNN_ASR_MIRROR_MATCHED}) (license_type=${existing_license_type["$asset_name"]})"
632
+ continue
633
+ fi
634
+ set_exhausted "$asset_name"
635
+ echo " $asset_name — download failed + fallbacks exhausted → license_type=$LICENSE_EXHAUSTED"
636
+ continue
637
+ fi
638
+
639
+ extracted_text=""
640
+ used_file="${license_paths[0]}"
641
+ for p in "${license_paths[@]}"; do
642
+ c1="$p"
643
+ c2=""
644
+ c3=""
645
+ if [[ "$p" == ./* ]]; then
646
+ c2="${p:2}"
647
+ else
648
+ c3="./$p"
649
+ fi
650
+
651
+ for c in "$c1" "$c2" "$c3"; do
652
+ if [[ -z "$c" ]]; then continue; fi
653
+ # Avoid bash "ignored null byte" from $(...) and cap size (wrong member / binary).
654
+ out="$(
655
+ tar -xOf "$archive_path" "$c" 2>/dev/null | head -c 524288 | tr -d '\000' || true
656
+ )"
657
+ if [[ -n "$out" ]]; then
658
+ extracted_text="$out"
659
+ used_file="$p"
660
+ break 2
661
+ fi
662
+ done
663
+ done
664
+
665
+ if [[ -z "$extracted_text" ]]; then
666
+ rm -rf "$td"
667
+ if try_hf_model_card_fallback "$asset_name"; then
668
+ echo " $asset_name — could not extract license file → filled from $(log_license_fallback_source "$asset_name") (license_type=${existing_license_type["$asset_name"]})"
669
+ continue
670
+ fi
671
+ if try_qnn_asr_license_fallback "$asset_name"; then
672
+ echo " $asset_name — could not extract license + HF exhausted → QNN mirror from asr row (${_QNN_ASR_MIRROR_MATCHED}) (license_type=${existing_license_type["$asset_name"]})"
673
+ continue
674
+ fi
675
+ set_exhausted "$asset_name"
676
+ echo " $asset_name — could not extract license file + fallbacks exhausted → license_type=$LICENSE_EXHAUSTED"
677
+ continue
678
+ fi
679
+
680
+ det="$(detect_license "$extracted_text")"
681
+ l_res="$(echo "$det" | cut -d'|' -f1)"
682
+ c_res="$(echo "$det" | cut -d'|' -f2)"
683
+ conf_res="$(echo "$det" | cut -d'|' -f3)"
684
+
685
+ rm -rf "$td"
686
+
687
+ if [[ "$l_res" == "unknown" ]]; then
688
+ if try_hf_model_card_fallback "$asset_name"; then
689
+ echo " $asset_name — archive license text unknown → filled from $(log_license_fallback_source "$asset_name") (license_type=${existing_license_type["$asset_name"]})"
690
+ continue
691
+ fi
692
+ if try_qnn_asr_license_fallback "$asset_name"; then
693
+ echo " $asset_name — archive text unknown + HF exhausted → QNN mirror from asr row (${_QNN_ASR_MIRROR_MATCHED}) (license_type=${existing_license_type["$asset_name"]})"
694
+ continue
695
+ fi
696
+ set_exhausted "$asset_name"
697
+ echo " $asset_name — archive text unclassified + fallbacks exhausted → license_type=$LICENSE_EXHAUSTED"
698
+ continue
699
+ fi
700
+
701
+ set_detected "$asset_name" "$l_res" "$c_res" "$conf_res" "$used_file"
702
+ echo " $asset_name — detected license_type=$l_res commercial_use=$c_res confidence=$conf_res file=$used_file"
703
+ done
704
+
705
+ echo "--- writing CSV ---"
706
+ mkdir -p "$(dirname "$CSV_FILE")"
707
+ echo "asset_name,license_type,commercial_use,confidence,detection_source,license_file" > "$CSV_FILE"
708
+
709
+ declare -A out_seen=()
710
+ for name in "${release_assets[@]}"; do
711
+ if [[ -z "${out_seen["$name"]:-}" ]]; then
712
+ echo "${name},${existing_license_type["$name"]:-},${existing_commercial_use["$name"]:-},${existing_confidence["$name"]:-},${existing_detection_source["$name"]:-},${existing_license_file["$name"]:-}" >> "$CSV_FILE"
713
+ out_seen["$name"]=1
714
+ fi
715
+ done
716
+
717
+ declare -a remaining=()
718
+ # Empty assoc: ${!existing_asset_name[@]} can trip `set -u` on some Bash builds.
719
+ declare -a existing_asset_keys=()
720
+ set +u
721
+ existing_asset_keys=("${!existing_asset_name[@]}")
722
+ set -u
723
+ for name in "${existing_asset_keys[@]}"; do
724
+ if [[ -z "${out_seen["$name"]:-}" ]]; then
725
+ remaining+=("$name")
726
+ fi
727
+ done
728
+
729
+ if [[ ${#remaining[@]} -gt 0 ]]; then
730
+ echo "Appending ${#remaining[@]} asset(s) present in CSV but not in current release asset list."
731
+ mapfile -t remaining_sorted < <(printf "%s\n" "${remaining[@]}" | sort)
732
+ for name in "${remaining_sorted[@]}"; do
733
+ echo "${name},${existing_license_type["$name"]:-},${existing_commercial_use["$name"]:-},${existing_confidence["$name"]:-},${existing_detection_source["$name"]:-},${existing_license_file["$name"]:-}" >> "$CSV_FILE"
734
+ done
735
+ fi
736
+
737
+ out_lines=$(wc -l < "$CSV_FILE" | tr -d ' ')
738
+ echo "Done. Wrote $CSV_FILE ($out_lines lines including header)."
739
+
740
+ # Keep Android and iOS bundled copies identical (paths relative to repo root).
741
+ # When --csv already points at the Android path, skip copying onto itself (cp errors on same file).
742
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
743
+ REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
744
+ if [[ -d "$REPO_ROOT/android" && -d "$REPO_ROOT/ios" && -f "$CSV_FILE" ]]; then
745
+ _bn="$(basename "$CSV_FILE")"
746
+ _android_dir="$REPO_ROOT/android/src/main/assets/model_licenses"
747
+ _ios_dir="$REPO_ROOT/ios/Resources/model_licenses"
748
+ _android_target="$_android_dir/$_bn"
749
+ _ios_target="$_ios_dir/$_bn"
750
+ mkdir -p "$_android_dir" "$_ios_dir"
751
+ same_canonical_path() {
752
+ local a="$1" b="$2"
753
+ local ca cb
754
+ ca="$(cd "$(dirname "$a")" && pwd)/$(basename "$a")"
755
+ cb="$(cd "$(dirname "$b")" && pwd)/$(basename "$b")"
756
+ [[ "$ca" == "$cb" ]]
757
+ }
758
+ if ! same_canonical_path "$CSV_FILE" "$_android_target"; then
759
+ cp "$CSV_FILE" "$_android_target"
760
+ fi
761
+ if ! same_canonical_path "$CSV_FILE" "$_ios_target"; then
762
+ cp "$CSV_FILE" "$_ios_target"
763
+ fi
764
+ echo "Synced $_bn → android/src/main/assets/model_licenses/ and ios/Resources/model_licenses/"
765
+ fi