@onekeyfe/react-native-background-thread 3.0.63 → 3.0.64

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@
4
4
  #include <atomic>
5
5
  #include <chrono>
6
6
  #include <condition_variable>
7
+ #include <cstdio>
7
8
  #include <deque>
8
9
  #include <functional>
9
10
  #include <memory>
@@ -79,6 +80,11 @@ static constexpr size_t kRuntimeQueueWarnInterval = 128;
79
80
  struct RuntimeWorkQueue {
80
81
  std::deque<std::function<void(jsi::Runtime &)>> items;
81
82
  bool drainScheduled = false;
83
+ // workId of the drain currently posted to gPendingWork (valid only while
84
+ // drainScheduled == true). Tracked so a teardown path (nativeInvalidate
85
+ // SharedRpc) can erase the orphaned gPendingWork entry if its posted drain
86
+ // is dropped during reload. -1 means "none outstanding".
87
+ int64_t scheduledDrainWorkId = -1;
82
88
  };
83
89
 
84
90
  static RuntimeWorkQueue gMainRuntimeWorkQueue;
@@ -88,6 +94,20 @@ static RuntimeWorkQueue &getRuntimeWorkQueue(bool isMain) {
88
94
  return isMain ? gMainRuntimeWorkQueue : gBgRuntimeWorkQueue;
89
95
  }
90
96
 
97
+ // Caller MUST hold gWorkMutex. Intentionally leak (abandon) each queued functor
98
+ // — its ~jsi::Function must not run off the JS thread / on a dead runtime — then
99
+ // clear the queue and disarm the drain latch so a recovered runtime re-arms a
100
+ // fresh drain on the next enqueue instead of stranding work behind a stale
101
+ // drainScheduled==true.
102
+ static void leakAndClearRuntimeQueue(RuntimeWorkQueue &queue) {
103
+ for (auto &work : queue.items) {
104
+ new std::function<void(jsi::Runtime &)>(std::move(work));
105
+ }
106
+ queue.items.clear();
107
+ queue.drainScheduled = false;
108
+ queue.scheduledDrainWorkId = -1;
109
+ }
110
+
91
111
  static bool callScheduleOnJSThread(const JavaObjectRef &ref, bool isMain, int64_t workId) {
92
112
  JNIEnv *env = getJNIEnv();
93
113
  if (!env || !ref) {
@@ -133,6 +153,8 @@ static bool callScheduleOnJSThread(const JavaObjectRef &ref, bool isMain, int64_
133
153
  return scheduled == JNI_TRUE;
134
154
  }
135
155
 
156
+ static void drainPendingBgEvals(const std::string &reason);
157
+
136
158
  static void scheduleRuntimeDrain(const JavaObjectRef &ref, bool isMain);
137
159
 
138
160
  static void drainRuntimeWorkQueue(jsi::Runtime &rt, JavaObjectRef ref, bool isMain) {
@@ -170,6 +192,9 @@ static void drainRuntimeWorkQueue(jsi::Runtime &rt, JavaObjectRef ref, bool isMa
170
192
  remaining = queue.items.size();
171
193
  if (remaining == 0) {
172
194
  queue.drainScheduled = false;
195
+ // This drain's gPendingWork entry was already erased by
196
+ // nativeExecuteWork before it ran; its workId is now stale.
197
+ queue.scheduledDrainWorkId = -1;
173
198
  } else {
174
199
  shouldReschedule = true;
175
200
  }
@@ -190,8 +215,24 @@ static void scheduleRuntimeDrain(const JavaObjectRef &ref, bool isMain) {
190
215
  size_t queued = 0;
191
216
  {
192
217
  std::lock_guard<std::mutex> lock(gWorkMutex);
218
+ auto &queue = getRuntimeWorkQueue(isMain);
219
+ // Stale-id guard: drainRuntimeWorkQueue observes remaining>0, drops the
220
+ // lock, then calls us — but a concurrent nativeInvalidateSharedRpc can
221
+ // clear the queue + latch in between. If the queue is now empty there is
222
+ // nothing to drain: do NOT post a gPendingWork entry / scheduleOnJSThread
223
+ // for an already-drained/invalidated queue. Just disarm the latch and
224
+ // return. The normal enqueue→schedule path always has items.size()>=1, so
225
+ // this never short-circuits it.
226
+ if (queue.items.empty()) {
227
+ queue.drainScheduled = false;
228
+ queue.scheduledDrainWorkId = -1;
229
+ return;
230
+ }
193
231
  workId = gNextWorkId++;
194
- queued = getRuntimeWorkQueue(isMain).items.size();
232
+ queued = queue.items.size();
233
+ // Track the outstanding drain's workId so a teardown path can erase its
234
+ // orphaned gPendingWork entry if the post is dropped during reload.
235
+ queue.scheduledDrainWorkId = workId;
195
236
  gPendingWork[workId] = [ref, isMain](jsi::Runtime &rt) {
196
237
  drainRuntimeWorkQueue(rt, ref, isMain);
197
238
  };
@@ -199,11 +240,23 @@ static void scheduleRuntimeDrain(const JavaObjectRef &ref, bool isMain) {
199
240
 
200
241
  bool scheduled = callScheduleOnJSThread(ref, isMain, workId);
201
242
  if (!scheduled) {
202
- std::lock_guard<std::mutex> lock(gWorkMutex);
203
- gPendingWork.erase(workId);
204
- getRuntimeWorkQueue(isMain).drainScheduled = false;
205
- LOGE("executor: failed to schedule runtime drain isMain=%d, workId=%ld, queued=%zu",
206
- isMain, (long)workId, queued);
243
+ {
244
+ std::lock_guard<std::mutex> lock(gWorkMutex);
245
+ gPendingWork.erase(workId);
246
+ leakAndClearRuntimeQueue(getRuntimeWorkQueue(isMain));
247
+ LOGE("executor: failed to schedule runtime drain isMain=%d, workId=%ld, queued=%zu",
248
+ isMain, (long)workId, queued);
249
+ }
250
+ // The bg JS thread is unreachable, so the queued bg-eval lambdas will
251
+ // never run. Settle any in-flight bg eval (retryable NO_RUNTIME) so its
252
+ // JNI global ref is released and the JS promise resolves now instead of
253
+ // hanging on the Kotlin 30s watchdog. Gated on !isMain — a main-thread
254
+ // schedule hiccup must never falsely reject healthy bg evals. Called
255
+ // outside gWorkMutex: drainPendingBgEvals takes gBgEvalMutex and does a
256
+ // Java upcall, so it must not run under a native lock.
257
+ if (!isMain) {
258
+ drainPendingBgEvals("Background JS thread unreachable when scheduling segment eval");
259
+ }
207
260
  }
208
261
  }
209
262
 
@@ -607,6 +660,351 @@ static void installTimersOnRuntime(jsi::Runtime &rt) {
607
660
  LOGI("Timer + rAF + rIC polyfills installed on bg runtime");
608
661
  }
609
662
 
663
+ // ── Background segment eval (fix A: eval-then-resolve on the BG runtime) ──
664
+ //
665
+ // WHY THIS EXISTS — the "Requiring unknown module" race on the BACKGROUND
666
+ // runtime:
667
+ // The Kotlin BackgroundThreadManager.registerSegmentInBackground previously
668
+ // called `ReactContext.registerSegment(...)`, which (bridgeless) routes through
669
+ // ReactHostImpl.registerSegment → ReactInstance.registerSegment → C++
670
+ // ReactInstance::registerSegment, and that only `scheduleWork`s the
671
+ // evaluateJavaScript onto the RuntimeScheduler before returning. The Kotlin
672
+ // completion callback then fired immediately, so the JS promise resolved BEFORE
673
+ // the segment's `__d(...)` module definitions ran. Metro's
674
+ // `import().then(() => __r(moduleId))` could then run `__r` before the module
675
+ // table was populated → a fatal, uncatchable "Requiring unknown module". Locale
676
+ // segments load through THIS bg path, so a language switch could still crash.
677
+ //
678
+ // THE FIX (mirrors the MAIN-runtime SplitBundleLoaderJSI fix and the iOS
679
+ // callFunctionOnBufferedRuntimeExecutor: fix):
680
+ // We evaluate the segment OURSELVES on the BACKGROUND JS thread and signal
681
+ // completion in the SAME block, strictly AFTER eval. The accessor we use is the
682
+ // background runtime's own RuntimeExecutor (`gBgTimerExecutor`), captured in
683
+ // nativeInstallSharedBridge for the bg runtime (isMain=false). It dispatches via
684
+ // scheduleOnJSThread(isMain=false, ...) → nativeExecuteWork, which runs the work
685
+ // on the bg JS queue thread and then drainMicrotasks() — so this targets the
686
+ // BACKGROUND runtime, NOT the main one. This is the correct bg analogue of the
687
+ // main path's CallInvoker (the bg runtime is created by ReactHostImpl and the bg
688
+ // ReactContext does not surface a usable jsCallInvokerHolder the way the main
689
+ // one does, but its RuntimeExecutor already exists and routes to the bg JS
690
+ // thread).
691
+ //
692
+ // Off-thread read (fix F): the segment file is read on the CALLING (native
693
+ // module) thread before dispatch; only evaluateJavaScript + completion run on
694
+ // the bg JS thread.
695
+
696
+ // Java callback contract — implemented in Kotlin as
697
+ // BackgroundThreadManager.SegmentEvalCallback.onComplete(error). Empty/null
698
+ // error string → success. A message prefixed with "NO_RUNTIME:" → bg runtime
699
+ // not ready (retryable); "IO_ERROR:" → file read failure (fatal); otherwise →
700
+ // eval throw (fatal). Resolved exactly once on the Kotlin side via its watchdog
701
+ // guard.
702
+ static void invokeBgSegmentCallback(jobject globalCallback, const std::string &error) {
703
+ JNIEnv *env = getJNIEnv();
704
+ if (!env || !globalCallback) {
705
+ return;
706
+ }
707
+ jclass cls = env->GetObjectClass(globalCallback);
708
+ jmethodID mid =
709
+ env->GetMethodID(cls, "onComplete", "(Ljava/lang/String;)V");
710
+ if (mid) {
711
+ jstring jerr = error.empty() ? nullptr : env->NewStringUTF(error.c_str());
712
+ env->CallVoidMethod(globalCallback, mid, jerr);
713
+ if (env->ExceptionCheck()) {
714
+ LOGE("invokeBgSegmentCallback: JNI exception after onComplete");
715
+ env->ExceptionDescribe();
716
+ env->ExceptionClear();
717
+ }
718
+ if (jerr) {
719
+ env->DeleteLocalRef(jerr);
720
+ }
721
+ } else {
722
+ LOGE("invokeBgSegmentCallback: onComplete method not found!");
723
+ if (env->ExceptionCheck()) {
724
+ env->ExceptionDescribe();
725
+ env->ExceptionClear();
726
+ }
727
+ }
728
+ env->DeleteLocalRef(cls);
729
+ }
730
+
731
+ // ── Pending bg-eval callback registry (fix: bounded global-ref lifetime) ──
732
+ //
733
+ // The bg-eval work lambda captures a JNI global ref to the SegmentEvalCallback
734
+ // and is enqueued onto gPendingWork for the bg JS thread. If that work is
735
+ // enqueued but NEVER runs, the captured global ref would leak and the JS promise
736
+ // would settle only via the Kotlin 30s watchdog. The drop paths that release it:
737
+ // - Java schedule fails (JNI exception / missing scheduleOnJSThread): the C++
738
+ // executor erases gPendingWork[workId] and drains this eval.
739
+ // - bg context==null / ptr==0 in scheduleOnJSThread: Kotlin calls
740
+ // nativeDropScheduledWork(workId) — which erases the work AND drains —
741
+ // BEFORE it would call nativeExecuteWork. (nativeExecuteWork's own
742
+ // rt==nullptr guard merely returns and is NOT a drain path; it is never
743
+ // reached for a dead ptr because Kotlin intercepts that case first.)
744
+ // - nativeDestroy: intentionally leaks the work lambdas (their ~jsi::Function
745
+ // can't run on a torn-down runtime) but drains this registry to settle them.
746
+ // To make this strictly bounded, every in-flight bg eval registers here. Whoever
747
+ // settles it first (the work lambda after eval, OR a drain on a drop path) claims
748
+ // it via `settled`, invokes the Java callback exactly once, and deletes the
749
+ // global ref. This guarantees no leak and no double-invoke.
750
+ struct PendingBgEval {
751
+ jobject globalCallback; // owned: deleted by whoever settles
752
+ std::shared_ptr<std::atomic<bool>> settled; // exactly-once claim
753
+ };
754
+ static std::mutex gBgEvalMutex;
755
+ static std::unordered_map<int64_t, PendingBgEval> gPendingBgEvals;
756
+ static int64_t gNextBgEvalId = 0;
757
+
758
+ // Settle a pending bg eval exactly once: invoke the Java callback with `error`
759
+ // (empty => success) and release the global ref. The shared `settled` flag is
760
+ // the single source of truth for the one-shot — the registry entry may already
761
+ // be gone (claimed/erased by the other party), so this is self-contained.
762
+ static void settleBgEval(jobject globalCallback,
763
+ const std::shared_ptr<std::atomic<bool>> &settled,
764
+ const std::string &error) {
765
+ if (!settled) {
766
+ return;
767
+ }
768
+ bool expected = false;
769
+ if (!settled->compare_exchange_strong(expected, true)) {
770
+ // Already settled by the other party (lambda vs drain). Do nothing —
771
+ // the winner already invoked + deleted the global ref.
772
+ return;
773
+ }
774
+ invokeBgSegmentCallback(globalCallback, error);
775
+ JNIEnv *env = getJNIEnv();
776
+ if (env && globalCallback) {
777
+ env->DeleteGlobalRef(globalCallback);
778
+ }
779
+ }
780
+
781
+ // Settle ALL currently-registered bg evals with a retryable NO_RUNTIME-class
782
+ // failure and clear the registry. Used when the bg runtime is going away (or is
783
+ // unreachable) and any enqueued-but-unrun eval would otherwise leak its global
784
+ // ref and hang the JS promise until the Kotlin watchdog. Each settle is
785
+ // exactly-once (the work lambda may race us, but the shared flag arbitrates),
786
+ // so this never double-invokes.
787
+ static void drainPendingBgEvals(const std::string &reason) {
788
+ // Move the entries OUT under the lock and clear the registry, then settle
789
+ // OUTSIDE the lock. settleBgEval performs a Java upcall (onComplete via
790
+ // CallVoidMethod), so holding gBgEvalMutex across it would hold a native
791
+ // lock across arbitrary JS — a re-entrancy / deadlock hazard if a callback
792
+ // ever synchronously re-enters a gBgEvalMutex-taking path. PendingBgEval is
793
+ // copyable (jobject handle + shared_ptr); copies share the same global ref
794
+ // and `settled` flag, and the shared flag still arbitrates exactly-once
795
+ // against a racing work lambda, so the global ref is released exactly once.
796
+ std::vector<PendingBgEval> drained;
797
+ {
798
+ std::lock_guard<std::mutex> lock(gBgEvalMutex);
799
+ if (gPendingBgEvals.empty()) {
800
+ return;
801
+ }
802
+ LOGE("[SplitBundle] draining %zu pending bg eval(s): %s",
803
+ gPendingBgEvals.size(), reason.c_str());
804
+ drained.reserve(gPendingBgEvals.size());
805
+ for (auto &entry : gPendingBgEvals) {
806
+ drained.push_back(entry.second);
807
+ }
808
+ gPendingBgEvals.clear();
809
+ }
810
+ for (auto &entry : drained) {
811
+ settleBgEval(entry.globalCallback, entry.settled, "NO_RUNTIME:" + reason);
812
+ }
813
+ }
814
+
815
+ // Reads the whole file at `path` into `out`. Returns false on failure.
816
+ static bool readBgFileToString(const std::string &path, std::string &out) {
817
+ FILE *f = std::fopen(path.c_str(), "rb");
818
+ if (f == nullptr) {
819
+ return false;
820
+ }
821
+ if (std::fseek(f, 0, SEEK_END) != 0) {
822
+ std::fclose(f);
823
+ return false;
824
+ }
825
+ long size = std::ftell(f);
826
+ if (size < 0) {
827
+ std::fclose(f);
828
+ return false;
829
+ }
830
+ if (std::fseek(f, 0, SEEK_SET) != 0) {
831
+ std::fclose(f);
832
+ return false;
833
+ }
834
+ out.resize(static_cast<size_t>(size));
835
+ size_t readBytes =
836
+ (size == 0) ? 0 : std::fread(&out[0], 1, static_cast<size_t>(size), f);
837
+ std::fclose(f);
838
+ return readBytes == static_cast<size_t>(size);
839
+ }
840
+
841
+ // nativeEvaluateSegmentInBackground: schedule eval of the segment at `path`
842
+ // onto the BACKGROUND JS thread via the bg RuntimeExecutor and invoke `callback`
843
+ // from INSIDE that same block, strictly AFTER eval. Returns immediately; the
844
+ // callback fires later on the bg JS thread (or synchronously here on a fail-fast
845
+ // path such as bg runtime not ready / file read failure).
846
+ extern "C" JNIEXPORT void JNICALL
847
+ Java_com_backgroundthread_BackgroundThreadManager_nativeEvaluateSegmentInBackground(
848
+ JNIEnv *env, jobject /* thiz */, jstring segmentPath, jstring sourceURL,
849
+ jobject callback) {
850
+
851
+ // Global-ref the callback: it is invoked later on a different thread.
852
+ jobject globalCallback = env->NewGlobalRef(callback);
853
+ // Shared one-shot claim flag for this eval — used by BOTH the work lambda
854
+ // (after eval) and any drop-path drain, so exactly one of them invokes the
855
+ // callback + deletes the global ref.
856
+ auto settled = std::make_shared<std::atomic<bool>>(false);
857
+
858
+ const char *pathChars = segmentPath ? env->GetStringUTFChars(segmentPath, nullptr) : nullptr;
859
+ std::string path = pathChars ? std::string(pathChars) : std::string();
860
+ if (pathChars) env->ReleaseStringUTFChars(segmentPath, pathChars);
861
+
862
+ const char *urlChars = sourceURL ? env->GetStringUTFChars(sourceURL, nullptr) : nullptr;
863
+ std::string url = urlChars ? std::string(urlChars) : std::string("segment");
864
+ if (urlChars) env->ReleaseStringUTFChars(sourceURL, urlChars);
865
+
866
+ // Fail-fast on THIS thread: settle exactly once, no registry entry created.
867
+ auto finishOnThisThread = [&](const std::string &err) {
868
+ settleBgEval(globalCallback, settled, err);
869
+ };
870
+
871
+ if (path.empty()) {
872
+ finishOnThisThread("IO_ERROR:Empty segment path");
873
+ return;
874
+ }
875
+
876
+ // Snapshot the bg RuntimeExecutor under the timer mutex (the same lock that
877
+ // guards its assignment/teardown). If it's null the bg runtime hasn't
878
+ // installed its SharedBridge yet → retryable NO_RUNTIME.
879
+ RPCRuntimeExecutor executor;
880
+ {
881
+ std::lock_guard<std::mutex> lock(gTimerMutex);
882
+ executor = gBgTimerExecutor;
883
+ }
884
+ if (!executor) {
885
+ finishOnThisThread("NO_RUNTIME:Background runtime executor not available");
886
+ return;
887
+ }
888
+
889
+ // F: read the segment file HERE, on the calling (native module) thread,
890
+ // BEFORE dispatch — so only evaluateJavaScript + completion run on the bg JS
891
+ // thread and the read does not block it or race the watchdog.
892
+ std::string source;
893
+ if (!readBgFileToString(path, source)) {
894
+ finishOnThisThread("IO_ERROR:Failed to read bg segment file: " + path);
895
+ return;
896
+ }
897
+ if (source.empty()) {
898
+ finishOnThisThread("IO_ERROR:Empty bg segment file: " + path);
899
+ return;
900
+ }
901
+
902
+ // Register this in-flight eval BEFORE dispatch so that if the enqueued work
903
+ // never runs (schedule fails, context==null, ptr==0, or nativeDestroy drops
904
+ // pending work) the drain can settle it as a retryable NO_RUNTIME failure
905
+ // and release the global ref — instead of leaking it and leaning on the
906
+ // Kotlin 30s watchdog. The work lambda removes its own entry when it runs.
907
+ int64_t bgEvalId;
908
+ {
909
+ std::lock_guard<std::mutex> lock(gBgEvalMutex);
910
+ bgEvalId = gNextBgEvalId++;
911
+ gPendingBgEvals[bgEvalId] = PendingBgEval{globalCallback, settled};
912
+ }
913
+
914
+ // Move the already-read buffer + the global callback ref into the work
915
+ // lambda. The lambda runs on the bg JS thread (via nativeExecuteWork), which
916
+ // also drainMicrotasks() after — preserving eval+resolve as one atomic turn.
917
+ executor([globalCallback, settled, bgEvalId, source = std::move(source),
918
+ url = std::move(url)](jsi::Runtime &rt) {
919
+ // We are running now → claim ownership and remove our registry entry so
920
+ // a concurrent nativeDestroy drain can't also touch this eval. If the
921
+ // entry is ALREADY gone, a drop/destroy drain (drainPendingBgEvals)
922
+ // already settled this eval as retryable NO_RUNTIME and JS will retry —
923
+ // so we must NOT evaluate the segment again. This is reachable because a
924
+ // ptr==0 reload keeps this lambda in the coalesced queue (it only
925
+ // disarms the drain latch, preserving queue.items) and the recovered
926
+ // runtime's install-recover replays it; without this guard the segment
927
+ // would be evaluated twice on the recovered runtime.
928
+ {
929
+ std::lock_guard<std::mutex> lock(gBgEvalMutex);
930
+ if (gPendingBgEvals.erase(bgEvalId) == 0) {
931
+ return;
932
+ }
933
+ }
934
+ std::string error;
935
+ try {
936
+ LOGI("[SplitBundle] bg evaluating segment %s (%zu bytes)", url.c_str(),
937
+ source.size());
938
+ auto buffer =
939
+ std::make_shared<jsi::StringBuffer>(std::move(source));
940
+ // Runs the segment's top-level __d(...) synchronously on the bg JS
941
+ // thread before returning.
942
+ rt.evaluateJavaScript(std::move(buffer), url);
943
+ LOGI("[SplitBundle] bg segment %s evaluated", url.c_str());
944
+ } catch (const jsi::JSError &e) {
945
+ error = std::string("Bg segment eval JSError for ") + url + ": " +
946
+ e.getMessage();
947
+ LOGE("[SplitBundle] %s", error.c_str());
948
+ } catch (const std::exception &e) {
949
+ error = std::string("Bg segment eval failed for ") + url + ": " +
950
+ e.what();
951
+ LOGE("[SplitBundle] %s", error.c_str());
952
+ } catch (...) {
953
+ error = std::string("Bg segment eval failed for ") + url +
954
+ " (unknown C++ exception)";
955
+ LOGE("[SplitBundle] %s", error.c_str());
956
+ }
957
+ // Resolve/reject from INSIDE this same bg-JS-thread block, strictly
958
+ // AFTER eval above — the ordering guarantee that fixes the race. The
959
+ // shared `settled` flag makes this a no-op if a drain already claimed
960
+ // it (it would not have, since we erased our entry above, but the flag
961
+ // keeps the invariant airtight against any reorder).
962
+ settleBgEval(globalCallback, settled, error);
963
+ });
964
+ }
965
+
966
+ // nativeDropScheduledWork: clean up after a scheduleOnJSThread drop path where
967
+ // CallVoidMethod itself SUCCEEDED but Kotlin then found the runtime unreachable
968
+ // (context==null / ptr==0) and returned WITHOUT calling nativeExecuteWork for
969
+ // this `workId`. Several things must be released for the given runtime:
970
+ // 1. gPendingWork[workId] — the stored work lambda (holding the segment
971
+ // SOURCE BUFFER). nativeExecuteWork is the only other eraser and it will
972
+ // never run for this id, so without this it leaks until nativeDestroy.
973
+ // 2. The coalesced RuntimeWorkQueue's drain latch for this runtime — under
974
+ // the coalesced model a successful post (scheduled==true) that never
975
+ // reaches the JS thread leaves the queue stranded with drainScheduled==
976
+ // true, so a recovered runtime would never re-arm a drain. This is a
977
+ // TRANSIENT condition: ptr==0 happens during reload, and the SAME runtime
978
+ // recovers with a fresh ptr. We therefore must NOT leak+clear queue.items
979
+ // — the main runtime has no JS-side retry net, so abandoning its queued
980
+ // SharedRPC deliveries (notifyOtherRuntime) loses them forever. Instead we
981
+ // only reset the drain latch (drainScheduled / scheduledDrainWorkId) so the
982
+ // next enqueue re-arms a fresh drain, leaving queue.items intact for the
983
+ // recovered runtime to drain. Applies to BOTH runtimes (isMain selects).
984
+ // 3. (bg only) The in-flight bg eval(s) — settle as retryable NO_RUNTIME so
985
+ // the JNI global ref is released and the JS promise resolves now instead of
986
+ // hanging on the 30s watchdog. drain-all is sound: an unreachable bg JS
987
+ // thread dooms every enqueued bg eval equally.
988
+ // Exactly-once via the shared `settled` flag, so a recovered runtime that later
989
+ // DOES run stale work (it can't — we erased it) would be a harmless no-op.
990
+ extern "C" JNIEXPORT void JNICALL
991
+ Java_com_backgroundthread_BackgroundThreadManager_nativeDropScheduledWork(
992
+ JNIEnv * /* env */, jobject /* thiz */, jboolean isMain, jlong workId) {
993
+ {
994
+ std::lock_guard<std::mutex> lock(gWorkMutex);
995
+ gPendingWork.erase(static_cast<int64_t>(workId));
996
+ // TRANSIENT ptr==0 (reload in flight): do NOT abandon queue.items — the
997
+ // recovered runtime still needs them (main has no JS retry net). Only
998
+ // disarm the drain latch so the next enqueue re-arms a fresh drain.
999
+ auto &queue = getRuntimeWorkQueue(static_cast<bool>(isMain));
1000
+ queue.drainScheduled = false;
1001
+ queue.scheduledDrainWorkId = -1;
1002
+ }
1003
+ if (!isMain) {
1004
+ drainPendingBgEvals("Background runtime unreachable when scheduling segment eval");
1005
+ }
1006
+ }
1007
+
610
1008
  // ── nativeInstallSharedBridge ───────────────────────────────────────────
611
1009
  // Install SharedStore and SharedRPC into a runtime.
612
1010
  extern "C" JNIEXPORT void JNICALL
@@ -630,6 +1028,11 @@ Java_com_backgroundthread_BackgroundThreadManager_nativeInstallSharedBridge(
630
1028
  bool capturedIsMain = static_cast<bool>(isMain);
631
1029
 
632
1030
  RPCRuntimeExecutor executor = [ref, capturedIsMain](std::function<void(jsi::Runtime &)> work) {
1031
+ // Coalesce per-runtime work into a single batched drain (see
1032
+ // enqueueRuntimeWork) instead of one Kotlin scheduleOnJSThread hop per
1033
+ // item. The bg-eval failure-drain that previously lived inline here now
1034
+ // runs in scheduleRuntimeDrain's schedule-failure path — under the
1035
+ // coalesced model that is the single place a schedule can fail.
633
1036
  enqueueRuntimeWork(ref, capturedIsMain, std::move(work));
634
1037
  };
635
1038
 
@@ -638,6 +1041,12 @@ Java_com_backgroundthread_BackgroundThreadManager_nativeInstallSharedBridge(
638
1041
  // back to the bg JS queue. We must do this BEFORE moving `executor` into
639
1042
  // SharedRPC::install (which will std::move it out).
640
1043
  if (!capturedIsMain) {
1044
+ // gBgTimerExecutor is read/cleared under gTimerMutex (timer worker
1045
+ // snapshot ~L857, nativeDestroy ~L1133); this write must take the same
1046
+ // lock or it races those readers (UB on std::function). nativeInstall
1047
+ // SharedBridge holds no other lock here, so a narrow guard scoped to
1048
+ // just the assignment is correct and cannot double-lock.
1049
+ std::lock_guard<std::mutex> lock(gTimerMutex);
641
1050
  gBgTimerExecutor = executor;
642
1051
  }
643
1052
  SharedRPC::install(*rt, std::move(executor), runtimeId);
@@ -650,6 +1059,37 @@ Java_com_backgroundthread_BackgroundThreadManager_nativeInstallSharedBridge(
650
1059
  installTimersOnRuntime(*rt);
651
1060
  invokeOptionalGlobalFunction(*rt, "__setupBackgroundRPCHandler");
652
1061
  }
1062
+
1063
+ // Recover items stranded by a ptr==0 drop during reload. nativeDropScheduled
1064
+ // Work deliberately KEEPS queue.items (main has no JS retry net) and only
1065
+ // disarms the drain latch, relying on a LATER enqueue to re-arm a drain. But
1066
+ // if no further enqueue arrives after this runtime recovers, those carried-
1067
+ // over items (e.g. main-runtime notifyOtherRuntime deliveries) would sit
1068
+ // until nativeDestroy and be lost. Make recovery structural.
1069
+ //
1070
+ // Force a fresh drain on the freshly installed runtime whenever the queue is
1071
+ // non-empty, REGARDLESS of the drainScheduled latch: a drain posted before
1072
+ // reload may have been queued on the now-dead pre-reload JS thread and
1073
+ // silently discarded (its runnable never runs, so its nativeDropScheduledWork
1074
+ // never fires and the latch is stuck drainScheduled==true). Gating recovery on
1075
+ // !drainScheduled would then skip it and strand the items forever. Re-arming
1076
+ // here on this runtime's executor (`ref`) guarantees they drain; if a stale
1077
+ // drain is in fact still live, it self-cancels via the empty-queue guard in
1078
+ // scheduleRuntimeDrain (at worst one benign extra drain hop). Mirror
1079
+ // enqueueRuntimeWork's lock discipline: set the latch under gWorkMutex, call
1080
+ // scheduleRuntimeDrain OUTSIDE the lock. Applies to BOTH runtimes.
1081
+ bool shouldRecoverDrain = false;
1082
+ {
1083
+ std::lock_guard<std::mutex> lock(gWorkMutex);
1084
+ auto &queue = getRuntimeWorkQueue(capturedIsMain);
1085
+ if (!queue.items.empty()) {
1086
+ queue.drainScheduled = true;
1087
+ shouldRecoverDrain = true;
1088
+ }
1089
+ }
1090
+ if (shouldRecoverDrain) {
1091
+ scheduleRuntimeDrain(ref, capturedIsMain);
1092
+ }
653
1093
  }
654
1094
 
655
1095
  // ── nativeSetupErrorHandler ─────────────────────────────────────────────
@@ -734,6 +1174,26 @@ Java_com_backgroundthread_BackgroundThreadManager_nativeInvalidateSharedRpc(
734
1174
  env->ReleaseStringUTFChars(runtimeId, idChars);
735
1175
 
736
1176
  bool found = SharedRPC::invalidate(id);
1177
+
1178
+ // The runtime named by `id` is being torn down (restart → host.reload).
1179
+ // Its coalesced work queue must be fully quiesced: if a drain was posted
1180
+ // (drainScheduled==true) but is dropped during reload, the latch would
1181
+ // survive the reinstall and every future enqueue would see a stale
1182
+ // drainScheduled==true → shouldSchedule stays false and new work enqueues
1183
+ // but never schedules a drain again (work stranded forever). Leak+clear the
1184
+ // queue (it's being destroyed anyway — the queued ~jsi::Function must not
1185
+ // run on the dying runtime), reset the drain latch, and erase the orphaned
1186
+ // gPendingWork entry for the outstanding drain. runtimeId "main" → isMain.
1187
+ {
1188
+ std::lock_guard<std::mutex> lock(gWorkMutex);
1189
+ bool isMain = (id == "main");
1190
+ auto &queue = getRuntimeWorkQueue(isMain);
1191
+ if (queue.scheduledDrainWorkId >= 0) {
1192
+ gPendingWork.erase(queue.scheduledDrainWorkId);
1193
+ }
1194
+ leakAndClearRuntimeQueue(queue);
1195
+ }
1196
+
737
1197
  LOGI("nativeInvalidateSharedRpc: id=%s found=%d", id.c_str(), found ? 1 : 0);
738
1198
  return found ? JNI_TRUE : JNI_FALSE;
739
1199
  }
@@ -776,6 +1236,11 @@ Java_com_backgroundthread_BackgroundThreadManager_nativeDestroy(
776
1236
  // Drain pending cross-runtime work. Each std::function may capture a
777
1237
  // shared_ptr<jsi::Function> tied to the destroyed runtime; leak them
778
1238
  // for the same reason as above.
1239
+ //
1240
+ // NOTE: the bg-eval work lambdas captured here also hold a JNI global ref
1241
+ // to a SegmentEvalCallback. Leaking the std::function leaks that ref AND
1242
+ // leaves the JS promise pending — so we settle those explicitly below via
1243
+ // gPendingBgEvals (the entry survives independently of the leaked lambda).
779
1244
  {
780
1245
  std::lock_guard<std::mutex> lock(gWorkMutex);
781
1246
  for (auto &entry : gPendingWork) {
@@ -788,8 +1253,15 @@ Java_com_backgroundthread_BackgroundThreadManager_nativeDestroy(
788
1253
  }
789
1254
  queue->items.clear();
790
1255
  queue->drainScheduled = false;
1256
+ queue->scheduledDrainWorkId = -1;
791
1257
  }
792
1258
  }
793
1259
 
1260
+ // Drain pending bg-eval callbacks: the bg runtime is gone, so any eval that
1261
+ // was enqueued but never ran must be settled NOW (retryable NO_RUNTIME) so
1262
+ // the JS promise resolves immediately and the global ref is released —
1263
+ // rather than leaking and relying on the Kotlin 30s watchdog.
1264
+ drainPendingBgEvals("Background runtime destroyed before segment eval ran");
1265
+
794
1266
  LOGI("Native resources cleaned up");
795
1267
  }
@@ -24,6 +24,7 @@ import com.facebook.react.shell.MainReactPackage
24
24
  import java.io.File
25
25
  import java.lang.ref.WeakReference
26
26
  import java.util.concurrent.TimeUnit
27
+ import java.util.concurrent.atomic.AtomicBoolean
27
28
 
28
29
  /**
29
30
  * Singleton manager for the background React Native runtime.
@@ -63,6 +64,12 @@ class BackgroundThreadManager private constructor() {
63
64
  companion object {
64
65
  private const val MODULE_NAME = "background"
65
66
 
67
+ // Bounded watchdog: if the bg JS thread never drains to our scheduled
68
+ // eval (e.g. the bg entry bundle never finished evaluating), reject as a
69
+ // RETRYABLE timeout rather than leaving the JS promise pending forever.
70
+ // Matches the main-runtime SplitBundleLoader watchdog.
71
+ private const val BG_SEGMENT_EVAL_TIMEOUT_MS = 30_000L
72
+
66
73
  init {
67
74
  System.loadLibrary("background_thread")
68
75
  }
@@ -78,6 +85,20 @@ class BackgroundThreadManager private constructor() {
78
85
  }
79
86
  }
80
87
 
88
+ /**
89
+ * Completion contract invoked by the native (JNI) side AFTER the bg segment
90
+ * has been evaluated into the BACKGROUND runtime. Called from the bg JS
91
+ * thread (or synchronously on the caller thread for fail-fast paths).
92
+ *
93
+ * @param error null on success; a non-empty message on failure. A message
94
+ * prefixed with "NO_RUNTIME:" means the bg runtime is not ready yet
95
+ * (retryable); "IO_ERROR:" means the segment file read failed (fatal);
96
+ * any other message is a segment JS/Hermes eval throw (fatal).
97
+ */
98
+ fun interface SegmentEvalCallback {
99
+ fun onComplete(error: String?)
100
+ }
101
+
81
102
  // ── JNI declarations ────────────────────────────────────────────────────
82
103
 
83
104
  private external fun nativeInstallSharedBridge(runtimePtr: Long, isMain: Boolean)
@@ -85,6 +106,41 @@ class BackgroundThreadManager private constructor() {
85
106
  private external fun nativeDestroy()
86
107
  private external fun nativeExecuteWork(runtimePtr: Long, workId: Long)
87
108
 
109
+ /**
110
+ * Evaluate the segment at [segmentPath] into the BACKGROUND runtime on its
111
+ * JS thread and invoke [callback] from inside that same JS-thread block,
112
+ * strictly AFTER the segment's `__d(...)` module definitions have run. This
113
+ * is the ordering guarantee that fixes the bg "Requiring unknown module"
114
+ * race (the bg analogue of the main-runtime SplitBundleLoaderJSI fix).
115
+ *
116
+ * Returns immediately; [callback] fires later on the bg JS thread (or
117
+ * synchronously on the calling thread for fail-fast paths such as the bg
118
+ * runtime not being ready or the segment file failing to read).
119
+ */
120
+ private external fun nativeEvaluateSegmentInBackground(
121
+ segmentPath: String,
122
+ sourceURL: String,
123
+ callback: SegmentEvalCallback
124
+ )
125
+
126
+ /**
127
+ * Clean up after a SUCCESSFUL scheduleOnJSThread post that nonetheless can
128
+ * never run: called from [scheduleOnJSThread]'s ptr == 0 branch (we are
129
+ * already on the stale/torn-down JS thread, so C++ saw scheduled == true and
130
+ * will not clean up on its own). For the given runtime ([isMain]) the native
131
+ * side erases gPendingWork[workId] (frees the captured segment source
132
+ * buffer) and resets drainScheduled so a recovered runtime re-arms a fresh
133
+ * drain on the next enqueue. This is a TRANSIENT reload condition, so the
134
+ * native side deliberately leaves the coalesced RuntimeWorkQueue's items
135
+ * INTACT — the same runtime recovers and the main runtime has no JS-side
136
+ * retry net, so abandoning its queued SharedRPC deliveries would lose them.
137
+ * For the bg runtime only, it also settles every in-flight bg
138
+ * segment eval as a retryable NO_RUNTIME failure so each JNI global ref is
139
+ * released and the JS promise resolves immediately instead of leaking until
140
+ * teardown or the bg watchdog. Exactly-once on the native side.
141
+ */
142
+ private external fun nativeDropScheduledWork(isMain: Boolean, workId: Long)
143
+
88
144
  /**
89
145
  * Synchronously mark the SharedRPC listener for `runtimeId` as dead
90
146
  * before the underlying JS runtime is torn down. See
@@ -412,6 +468,13 @@ class BackgroundThreadManager private constructor() {
412
468
  BTLogger.info("scheduleOnJSThread: isMain=$isMain, workId=$workId, context=${context != null}")
413
469
  if (context == null) {
414
470
  BTLogger.error("scheduleOnJSThread: context is null! isMain=$isMain, mainCtx=${mainReactContext != null}, bgHost=${bgReactHost != null}, bgCtx=${bgReactHost?.currentReactContext != null}")
471
+ // The just-enqueued native work will never reach the JS thread.
472
+ // Return false so the C++ coalesced scheduler
473
+ // (callScheduleOnJSThread → scheduleRuntimeDrain's !scheduled branch)
474
+ // does the FULL cleanup itself: erase gPendingWork[workId], leak+clear
475
+ // the coalesced RuntimeWorkQueue, reset drainScheduled, and (bg only)
476
+ // settle any in-flight bg eval as retryable NO_RUNTIME. No
477
+ // nativeDropScheduledWork call is needed here.
415
478
  return false
416
479
  }
417
480
  return try {
@@ -428,6 +491,17 @@ class BackgroundThreadManager private constructor() {
428
491
  }
429
492
  } else {
430
493
  BTLogger.error("scheduleOnJSThread: ptr is 0! isMain=$isMain")
494
+ // We are already on the (stale/torn-down) JS thread after a
495
+ // SUCCESSFUL post (C++ saw scheduled==true), so the work won't
496
+ // run and C++ won't clean up on its own. Drop it for THIS
497
+ // runtime (main or bg): erase gPendingWork[workId] (frees the
498
+ // source buffer) and reset drainScheduled so a recovered
499
+ // runtime re-arms a fresh drain. The coalesced RuntimeWork
500
+ // Queue items are left intact (transient reload; the same
501
+ // runtime recovers and main has no JS retry net).
502
+ // drainPendingBgEvals inside the native fn is
503
+ // gated to !isMain, so settling bg evals only happens for bg.
504
+ nativeDropScheduledWork(isMain, workId)
431
505
  }
432
506
  }
433
507
  if (!posted) {
@@ -443,51 +517,98 @@ class BackgroundThreadManager private constructor() {
443
517
  // ── Segment Registration (Phase 2.5 spike) ─────────────────────────────
444
518
 
445
519
  /**
446
- * Register a HBC segment in the background runtime.
447
- * Uses CatalystInstance.registerSegment() on the background ReactContext.
520
+ * Evaluate a HBC segment into the background runtime with completion
521
+ * callback (fix A: eval-then-resolve).
448
522
  *
449
- * @param segmentId The segment ID to register
450
- * @param path Absolute file path to the .seg.hbc file
451
- * @throws IllegalStateException if background runtime is not started
452
- * @throws IllegalArgumentException if segment file does not exist
453
- */
454
- /**
455
- * Register a HBC segment in the background runtime with completion callback.
456
- * Dispatches to the background JS queue thread and invokes the callback
457
- * only after registerSegment has actually executed.
523
+ * Previously this called `ReactContext.registerSegment(...)`, whose
524
+ * completion fires BEFORE the segment bytecode is evaluated into the runtime
525
+ * (registerSegment only ENQUEUES the eval). That races Metro's
526
+ * `import().then(() => __r(moduleId))` and produces a fatal, uncatchable
527
+ * "Requiring unknown module" — locale segments load through this bg path, so
528
+ * a language switch could still crash. We now evaluate the segment OURSELVES
529
+ * on the bg JS thread via [nativeEvaluateSegmentInBackground] and resolve
530
+ * ONLY after eval completes, so eval + resolve are one atomic JS-thread turn.
458
531
  *
459
- * @param segmentId The segment ID to register
532
+ * @param segmentId The segment ID (used only for the synthetic source URL)
460
533
  * @param path Absolute file path to the .seg.hbc file
461
- * @param onComplete Called with null on success, or an Exception on failure
534
+ * @param onComplete Called with (code=null) on success, or
535
+ * (code=<contract reject code>, message) on failure. The code is one of
536
+ * the SHARED split-bundle contract codes so the JS loader's retryable set
537
+ * { SPLIT_BUNDLE_NO_RUNTIME, SPLIT_BUNDLE_TIMEOUT } classifies correctly.
462
538
  */
463
- fun registerSegmentInBackground(segmentId: Int, path: String, onComplete: (Exception?) -> Unit) {
539
+ fun registerSegmentInBackground(
540
+ segmentId: Int,
541
+ path: String,
542
+ onComplete: (code: String?, message: String?) -> Unit
543
+ ) {
464
544
  if (!isStarted) {
465
- onComplete(IllegalStateException("Background runtime not started"))
545
+ // Bg runtime not started yet → retryable (the loader will re-attempt
546
+ // once the bg host is up).
547
+ onComplete("SPLIT_BUNDLE_NO_RUNTIME", "Background runtime not started")
466
548
  return
467
549
  }
468
550
 
469
551
  val file = File(path)
470
552
  if (!file.exists()) {
471
- onComplete(IllegalArgumentException("Segment file not found: $path"))
553
+ onComplete("SPLIT_BUNDLE_NOT_FOUND", "Segment file not found: $path")
472
554
  return
473
555
  }
474
556
 
475
557
  val context = bgReactHost?.currentReactContext
476
558
  if (context == null) {
477
- onComplete(IllegalStateException("Background ReactContext not available"))
559
+ onComplete("SPLIT_BUNDLE_NO_RUNTIME", "Background ReactContext not available")
478
560
  return
479
561
  }
480
562
 
481
- // Use ReactContext.registerSegment which works in both bridge
482
- // and bridgeless modes.
563
+ // One-shot guard: native success/error AND the watchdog can each try to
564
+ // settle; only the first wins.
565
+ val settled = AtomicBoolean(false)
566
+ val sourceURL = "seg-$segmentId.js"
567
+ val segStart = System.nanoTime()
568
+
569
+ // Bounded watchdog: if the bg JS thread never drains to our eval, reject
570
+ // with the RETRYABLE SPLIT_BUNDLE_TIMEOUT instead of hanging forever.
571
+ val watchdog = Handler(Looper.getMainLooper())
572
+ val timeoutRunnable = Runnable {
573
+ if (settled.compareAndSet(false, true)) {
574
+ BTLogger.error("[SplitBundle] bg segment id=$segmentId eval timed out after ${BG_SEGMENT_EVAL_TIMEOUT_MS}ms (bg entry bundle likely never finished evaluating); rejecting as retryable timeout")
575
+ onComplete("SPLIT_BUNDLE_TIMEOUT", "Bg segment eval timed out: id=$segmentId")
576
+ }
577
+ }
578
+ watchdog.postDelayed(timeoutRunnable, BG_SEGMENT_EVAL_TIMEOUT_MS)
579
+
483
580
  try {
484
- context.registerSegment(segmentId, path) {
485
- BTLogger.info("Segment registered in background runtime: id=$segmentId, path=$path")
486
- onComplete(null)
581
+ nativeEvaluateSegmentInBackground(path, sourceURL) { error ->
582
+ if (settled.compareAndSet(false, true)) {
583
+ watchdog.removeCallbacks(timeoutRunnable)
584
+ if (error == null) {
585
+ val segMs = (System.nanoTime() - segStart) / 1_000_000.0
586
+ BTLogger.info("[SplitBundle] bg segment id=$segmentId evaluated in ${String.format("%.1f", segMs)}ms (eval-complete)")
587
+ onComplete(null, null)
588
+ } else {
589
+ // Native prefixes its failures so we can map to the
590
+ // shared contract codes: NO_RUNTIME (retryable),
591
+ // IO_ERROR (fatal), else eval throw (fatal).
592
+ when {
593
+ error.startsWith("NO_RUNTIME:") ->
594
+ onComplete("SPLIT_BUNDLE_NO_RUNTIME", error.removePrefix("NO_RUNTIME:"))
595
+ error.startsWith("IO_ERROR:") ->
596
+ onComplete("SPLIT_BUNDLE_IO_ERROR", error.removePrefix("IO_ERROR:"))
597
+ else ->
598
+ onComplete("SPLIT_BUNDLE_EVAL_ERROR", error)
599
+ }
600
+ }
601
+ }
602
+ }
603
+ } catch (e: Throwable) {
604
+ // nativeEvaluateSegmentInBackground itself failed to dispatch (e.g.
605
+ // UnsatisfiedLinkError). Fail closed; do NOT fall back to the
606
+ // race-prone registerSegment path.
607
+ if (settled.compareAndSet(false, true)) {
608
+ watchdog.removeCallbacks(timeoutRunnable)
609
+ BTLogger.error("[SplitBundle] FATAL: nativeEvaluateSegmentInBackground threw for id=$segmentId: ${e.message}")
610
+ onComplete("SPLIT_BUNDLE_NATIVE_UNAVAILABLE", "Bg native segment eval unavailable: ${e.message}")
487
611
  }
488
- } catch (e: Exception) {
489
- BTLogger.error("Failed to register segment in background runtime: ${e.message}")
490
- onComplete(e)
491
612
  }
492
613
  }
493
614
 
@@ -30,9 +30,17 @@ class BackgroundThreadModule(reactContext: ReactApplicationContext) :
30
30
 
31
31
  override fun loadSegmentInBackground(segmentId: Double, path: String, promise: Promise) {
32
32
  BackgroundThreadManager.getInstance()
33
- .registerSegmentInBackground(segmentId.toInt(), path) { error ->
34
- if (error != null) {
35
- promise.reject("BG_SEGMENT_LOAD_ERROR", error.message, error)
33
+ .registerSegmentInBackground(segmentId.toInt(), path) { code, message ->
34
+ if (code != null) {
35
+ // Reject with the SHARED split-bundle contract code (e.g.
36
+ // SPLIT_BUNDLE_NO_RUNTIME / SPLIT_BUNDLE_TIMEOUT are
37
+ // retryable; SPLIT_BUNDLE_IO_ERROR / SPLIT_BUNDLE_EVAL_ERROR
38
+ // / SPLIT_BUNDLE_NATIVE_UNAVAILABLE / SPLIT_BUNDLE_NOT_FOUND
39
+ // are not) so the JS loader classifies retryability the same
40
+ // way it does for the main path. registerSegmentInBackground
41
+ // always supplies one of these contract codes here, so there
42
+ // is no legacy/opaque default string to fall back to.
43
+ promise.reject(code, message)
36
44
  } else {
37
45
  promise.resolve(null)
38
46
  }
@@ -38,7 +38,54 @@
38
38
  path:path
39
39
  completion:^(NSError * _Nullable error) {
40
40
  if (error) {
41
- reject(@"BG_SEGMENT_LOAD_ERROR", error.localizedDescription, error);
41
+ // Fix 1/E: map the manager's DISTINCT NSError.code to its own JS
42
+ // reject code (matching SplitBundleLoader's main-runtime mapping and
43
+ // the shared error-code contract) so JS can classify
44
+ // retryable-vs-fatal. EVERY EBgMgrSegmentEvalError case is mapped
45
+ // EXPLICITLY here — there is no silent `default → NO_RUNTIME` that
46
+ // could MISCLASSIFY a fatal failure (e.g. a missing segment file)
47
+ // as a transient runtime-not-ready that gets retried/masked. The old
48
+ // code both collapsed bg failures into one opaque
49
+ // `BG_SEGMENT_LOAD_ERROR` and (before that) let raw codes 1/2 fall
50
+ // through `default` to retryable NO_RUNTIME.
51
+ NSString *rejectCode;
52
+ switch ((EBgMgrSegmentEvalError)error.code) {
53
+ case EBgMgrSegmentEvalErrorNotStarted:
54
+ case EBgMgrSegmentEvalErrorNilInstance:
55
+ // Bg runtime not started / RCTInstance nil — TRANSIENT: the
56
+ // bg host simply wasn't ready yet; a later attempt may win.
57
+ rejectCode = @"SPLIT_BUNDLE_NO_RUNTIME"; // retryable
58
+ break;
59
+ case EBgMgrSegmentEvalErrorFileNotFound:
60
+ // Segment file missing — FATAL packaging/OTA corruption.
61
+ rejectCode = @"SPLIT_BUNDLE_NOT_FOUND"; // fatal
62
+ break;
63
+ case EBgMgrSegmentEvalErrorIvarMissing:
64
+ // `_rctInstance` ivar reflection failed — STRUCTURAL/PERMANENT
65
+ // (an RN bump renamed the private field). Retrying is futile.
66
+ rejectCode = @"SPLIT_BUNDLE_NATIVE_UNAVAILABLE"; // fatal
67
+ break;
68
+ case EBgMgrSegmentEvalErrorIORead:
69
+ rejectCode = @"SPLIT_BUNDLE_IO_ERROR"; // fatal
70
+ break;
71
+ case EBgMgrSegmentEvalErrorEvalThrow:
72
+ rejectCode = @"SPLIT_BUNDLE_EVAL_ERROR"; // fatal (segment bug)
73
+ break;
74
+ case EBgMgrSegmentEvalErrorTimeout:
75
+ rejectCode = @"SPLIT_BUNDLE_TIMEOUT"; // retryable
76
+ break;
77
+ default:
78
+ // Defensive LAST RESORT only: every real EBgMgrSegmentEvalError
79
+ // case is handled above, so reaching here means the manager
80
+ // emitted an unmapped code (a bug). Choose retryable
81
+ // NO_RUNTIME so a stale/unknown native build degrades safely
82
+ // rather than permanently poisoning a segment — but log it
83
+ // loudly so the unmapped code is caught and named.
84
+ [BTLogger warn:[NSString stringWithFormat:@"[SplitBundle] bg loadSegment: UNMAPPED EBgMgrSegmentEvalError code=%ld — defaulting to retryable NO_RUNTIME. This is a native bug: add an explicit case.", (long)error.code]];
85
+ rejectCode = @"SPLIT_BUNDLE_NO_RUNTIME"; // retryable (defensive)
86
+ break;
87
+ }
88
+ reject(rejectCode, error.localizedDescription, error);
42
89
  } else {
43
90
  resolve(nil);
44
91
  }
@@ -6,6 +6,49 @@ NS_ASSUME_NONNULL_BEGIN
6
6
  @class RCTReactNativeFactory;
7
7
  @class RCTHost;
8
8
 
9
+ /// NSError `code` values produced by `registerSegmentInBackground:` /
10
+ /// `evaluateSegmentInBackground:` under the `BackgroundThread` error domain.
11
+ ///
12
+ /// These are DISTINCT (kept numerically aligned with SplitBundleLoader's
13
+ /// `ESegmentEvalError`) so the TurboModule boundary
14
+ /// (`BackgroundThread.loadSegmentInBackground`) can map each to its OWN JS
15
+ /// reject code and JS can classify retryable-vs-fatal — rather than collapsing
16
+ /// every bg failure into one opaque code (fix E). EVERY failure the manager
17
+ /// produces MUST have a NAMED case here so the boundary's switch maps it
18
+ /// explicitly: a raw integer literal would fall through to the boundary's
19
+ /// defensive `default` (→ retryable NO_RUNTIME) and silently MISCLASSIFY a
20
+ /// fatal failure (e.g. a missing segment file) as a transient one that gets
21
+ /// retried/masked instead of surfaced. The mapping the boundary applies is:
22
+ /// - NotStarted (bg runtime not started yet) → `SPLIT_BUNDLE_NO_RUNTIME` (retryable)
23
+ /// - FileNotFound (segment file missing) → `SPLIT_BUNDLE_NOT_FOUND` (fatal)
24
+ /// - NilInstance (bg RCTInstance is nil) → `SPLIT_BUNDLE_NO_RUNTIME` (retryable)
25
+ /// - IORead (file read/mmap failed) → `SPLIT_BUNDLE_IO_ERROR` (fatal)
26
+ /// - EvalThrow (segment JS/Hermes bug) → `SPLIT_BUNDLE_EVAL_ERROR` (fatal)
27
+ /// - Timeout (buffered executor never ran)→ `SPLIT_BUNDLE_TIMEOUT` (retryable)
28
+ /// - IvarMissing (`_rctInstance` reflection
29
+ /// failed — STRUCTURAL/permanent)→ `SPLIT_BUNDLE_NATIVE_UNAVAILABLE` (fatal)
30
+ ///
31
+ /// WHY NilInstance and IvarMissing are split (and not both NO_RUNTIME): a nil
32
+ /// RCTInstance is TRANSIENT — the bg host just isn't up yet, a later attempt
33
+ /// can succeed. A missing `_rctInstance` ivar is STRUCTURAL/PERMANENT — an RN
34
+ /// version bump renamed/removed the private field our reflection depends on, so
35
+ /// bg segment loading is disabled until the native code is updated. Retrying
36
+ /// the latter is futile, so it maps to fatal NATIVE_UNAVAILABLE.
37
+ ///
38
+ /// Declared here (not file-local in the .mm) so the boundary references these
39
+ /// by NAME — a future renumbering stays exact instead of drifting against a
40
+ /// hardcoded magic-number switch. Values 3-6 are kept STABLE; 1/2/7 fill in the
41
+ /// previously-raw `registerSegmentInBackground:` codes and the structural split.
42
+ typedef NS_ENUM(NSInteger, EBgMgrSegmentEvalError) {
43
+ EBgMgrSegmentEvalErrorNotStarted = 1, // bg runtime not started yet (retryable)
44
+ EBgMgrSegmentEvalErrorFileNotFound = 2, // segment file missing (fatal)
45
+ EBgMgrSegmentEvalErrorNilInstance = 3, // bg RCTInstance is nil (retryable)
46
+ EBgMgrSegmentEvalErrorIORead = 4, // file read/mmap failed (fatal)
47
+ EBgMgrSegmentEvalErrorEvalThrow = 5, // segment JS/Hermes bug (fatal)
48
+ EBgMgrSegmentEvalErrorTimeout = 6, // buffered executor never ran (retryable)
49
+ EBgMgrSegmentEvalErrorIvarMissing = 7, // `_rctInstance` ivar reflection failed (fatal, structural)
50
+ };
51
+
9
52
  @interface BackgroundThreadManager : NSObject
10
53
 
11
54
  /// Shared instance for singleton pattern
@@ -20,6 +20,82 @@
20
20
  #import <React/RCTReloadCommand.h>
21
21
  #import <ReactCommon/RCTHost.h>
22
22
  #import <objc/runtime.h>
23
+ #import <os/lock.h>
24
+ #include <jsi/jsi.h>
25
+ #include <cstdint>
26
+
27
+ namespace {
28
+
29
+ // Zero-copy jsi::Buffer that retains its NSData for the async (buffered)
30
+ // executor block's lifetime — same rationale as SplitBundleLoader's
31
+ // NSDataJSIBuffer (M4/M5): no second full copy of the segment bytes, and the
32
+ // mmap'd/heap bytes stay alive because the buffer owns the NSData.
33
+ class BgMgrNSDataJSIBuffer : public facebook::jsi::Buffer {
34
+ public:
35
+ explicit BgMgrNSDataJSIBuffer(NSData *data) : data_(data) {}
36
+ size_t size() const override { return data_.length; }
37
+ const uint8_t *data() const override {
38
+ return static_cast<const uint8_t *>(data_.bytes);
39
+ }
40
+
41
+ private:
42
+ NSData *data_; // strong retain (ARC).
43
+ };
44
+
45
+ } // namespace
46
+
47
+ // Exactly-once settle guard for the bg watchdog — same design as
48
+ // SplitBundleLoader's SBLSettleGuard. An ARC object captured by both the
49
+ // executor block and the watchdog dispatch_after; ARC keeps its lock alive
50
+ // until both release, so neither block needs (or may do) a manual free —
51
+ // avoiding the use-after-free that would occur if either freed the lock while
52
+ // the other still runs.
53
+ @interface BgMgrSettleGuard : NSObject
54
+ - (BOOL)tryClaim;
55
+ @end
56
+
57
+ @implementation BgMgrSettleGuard {
58
+ os_unfair_lock _lock;
59
+ BOOL _settled;
60
+ }
61
+ - (instancetype)init {
62
+ if (self = [super init]) {
63
+ _lock = OS_UNFAIR_LOCK_INIT;
64
+ _settled = NO;
65
+ }
66
+ return self;
67
+ }
68
+ - (BOOL)tryClaim {
69
+ os_unfair_lock_lock(&_lock);
70
+ BOOL won = !_settled;
71
+ if (won) {
72
+ _settled = YES;
73
+ }
74
+ os_unfair_lock_unlock(&_lock);
75
+ return won;
76
+ }
77
+ @end
78
+
79
+ // Watchdog window for the bg buffered runtime executor (H2 = bg-runtime port of
80
+ // SplitBundleLoader's C1). The bg runtime's buffered executor stays buffered
81
+ // until the bg entry bundle finishes evaluating in
82
+ // BackgroundReactNativeDelegate.hostDidStart:; if that never completes (host
83
+ // teardown, OTA-resolve abort), the block never runs — without this watchdog
84
+ // the JS promise would hang inflightSegments forever.
85
+ //
86
+ // Fix G: 30s (matching Android and the main-runtime watchdog). The buffered
87
+ // executor stays buffered until the bg ENTRY bundle finishes evaluating; on a
88
+ // slow/throttled cold start that entry eval can itself exceed 10s, which would
89
+ // falsely trip the watchdog on a load that was about to succeed. 30s keeps the
90
+ // genuine-wedge safety net while leaving generous headroom for slow cold starts.
91
+ static const NSTimeInterval kBgSegmentEvalWatchdogSeconds = 30.0;
92
+
93
+ // EBgMgrSegmentEvalError NSError `code` values are declared in
94
+ // BackgroundThreadManager.h so the TurboModule boundary
95
+ // (BackgroundThread.loadSegmentInBackground) can map each distinct code to its
96
+ // own JS reject code by NAME. They stay numerically aligned with
97
+ // SplitBundleLoader's ESegmentEvalError; see installProdBundleLoader.ts for the
98
+ // retryable-vs-fatal classification (H3 / fix E).
23
99
 
24
100
  @interface BackgroundThreadManager ()
25
101
  @property (nonatomic, strong) BackgroundReactNativeDelegate *reactNativeFactoryDelegate;
@@ -201,33 +277,166 @@ static NSString *const MODULE_DEBUG_URL = @"http://localhost:8082/apps/mobile/ba
201
277
  completion:(void (^)(NSError * _Nullable error))completion
202
278
  {
203
279
  if (!self.isStarted || !self.reactNativeFactoryDelegate) {
280
+ // Transient: the bg runtime just isn't up yet. NotStarted → NO_RUNTIME
281
+ // (retryable). Previously a raw `code:1` which fell through the
282
+ // boundary's `default` — same destination, but now NAMED so the mapping
283
+ // is explicit and can never drift (fix 1 / E).
204
284
  NSError *error = [NSError errorWithDomain:@"BackgroundThread"
205
- code:1
285
+ code:EBgMgrSegmentEvalErrorNotStarted
206
286
  userInfo:@{NSLocalizedDescriptionKey: @"Background runtime not started"}];
207
287
  if (completion) completion(error);
208
288
  return;
209
289
  }
210
290
 
211
- // Verify the file exists
291
+ // Verify the file exists. FATAL: a missing segment file is real packaging /
292
+ // OTA corruption — retrying just re-misses. Previously a raw `code:2` that
293
+ // the boundary's `default` MISCLASSIFIED as retryable NO_RUNTIME, masking
294
+ // the corruption; now FileNotFound → NOT_FOUND (fatal) (fix 1 / E — the
295
+ // NO-SHIP blocker).
212
296
  if (![[NSFileManager defaultManager] fileExistsAtPath:path]) {
213
297
  NSError *error = [NSError errorWithDomain:@"BackgroundThread"
214
- code:2
298
+ code:EBgMgrSegmentEvalErrorFileNotFound
215
299
  userInfo:@{NSLocalizedDescriptionKey:
216
300
  [NSString stringWithFormat:@"Segment file not found: %@", path]}];
217
301
  if (completion) completion(error);
218
302
  return;
219
303
  }
220
304
 
221
- BOOL success = [self.reactNativeFactoryDelegate registerSegmentWithId:segmentId path:path];
222
- if (success) {
223
- if (completion) completion(nil);
224
- } else {
305
+ [self evaluateSegmentInBackground:segmentId path:path completion:completion];
306
+ }
307
+
308
+ // Evaluate-then-resolve segment load for the BACKGROUND runtime (H2).
309
+ //
310
+ // WHY THIS REPLACES registerSegmentWithId: + immediate completion(nil):
311
+ // The old path called `[delegate registerSegmentWithId:path:]` (which routes to
312
+ // RCTInstance/ReactInstance::registerSegment — that only ENQUEUES
313
+ // `runtime.evaluateJavaScript(segment)` on the runtime scheduler and returns)
314
+ // then resolved the JS promise immediately. That is the exact
315
+ // "Requiring unknown module" race the MAIN runtime already fixed in
316
+ // SplitBundleLoader: Metro's `import().then(() => __r(moduleId))` microtask can
317
+ // run `__r` BEFORE the scheduled eval populated the module table → a FATAL,
318
+ // uncatchable crash. Locale segments load through THIS path in the bg runtime
319
+ // (ServiceSetting.refreshLocaleMessages → import('./json/*.json') runs in
320
+ // kit-bg), so a language switch could still crash.
321
+ //
322
+ // Fix: evaluate the segment OURSELVES inside one
323
+ // `callFunctionOnBufferedRuntimeExecutor:` block on the bg RCTInstance and
324
+ // signal completion in that SAME block, strictly AFTER eval — making eval +
325
+ // resolve one atomic unit so any subsequent `__r(moduleId)` finds the module.
326
+ // The bg RCTInstance is the delegate's private `_rctInstance` ivar; we read it
327
+ // reflectively (the same pattern installSharedBridgeInMainRuntime: uses on the
328
+ // main host) to keep this fix self-contained in this file rather than widening
329
+ // the delegate's surface. Includes the same exactly-once + watchdog guard as
330
+ // the main-runtime fix (C1) because the bg buffered executor is likewise
331
+ // buffered until the bg entry bundle finishes evaluating in hostDidStart:.
332
+ - (void)evaluateSegmentInBackground:(NSNumber *)segmentId
333
+ path:(NSString *)path
334
+ completion:(void (^)(NSError * _Nullable error))completion
335
+ {
336
+ // Reach the bg RCTInstance via the delegate's `_rctInstance` ivar. `id`
337
+ // (not a typed RCTInstance*) mirrors installSharedBridgeInMainRuntime:'s
338
+ // untyped handling and avoids needing the RCTInstance header here.
339
+ BackgroundReactNativeDelegate *delegate = self.reactNativeFactoryDelegate;
340
+ Ivar ivar = class_getInstanceVariable([delegate class], "_rctInstance");
341
+ if (!ivar) {
342
+ // Loud failure (L7 parity): a future RN/delegate refactor that renames
343
+ // this ivar silently disables ALL bg segment loading — surface it.
344
+ // STRUCTURAL/PERMANENT (not transient): retrying can never recreate a
345
+ // renamed ivar, so this maps to fatal NATIVE_UNAVAILABLE — distinct from
346
+ // the nil-instance case below, which IS transient. Misclassifying this
347
+ // as NO_RUNTIME would make JS retry a permanently-broken reflection.
348
+ [BTLogger error:[NSString stringWithFormat:@"[SplitBundle] FATAL: _rctInstance ivar not found on %@ — bg segment loading is DISABLED.", [delegate class]]];
225
349
  NSError *error = [NSError errorWithDomain:@"BackgroundThread"
226
- code:3
227
- userInfo:@{NSLocalizedDescriptionKey:
228
- @"Failed to register segment in background runtime"}];
350
+ code:EBgMgrSegmentEvalErrorIvarMissing
351
+ userInfo:@{NSLocalizedDescriptionKey: @"_rctInstance ivar not found on bg delegate"}];
352
+ if (completion) completion(error);
353
+ return;
354
+ }
355
+ id instance = object_getIvar(delegate, ivar);
356
+ if (!instance) {
357
+ [BTLogger error:@"[SplitBundle] bg loadSegment: background RCTInstance not available"];
358
+ NSError *error = [NSError errorWithDomain:@"BackgroundThread"
359
+ code:EBgMgrSegmentEvalErrorNilInstance
360
+ userInfo:@{NSLocalizedDescriptionKey: @"Background RCTInstance not available"}];
229
361
  if (completion) completion(error);
362
+ return;
230
363
  }
364
+
365
+ // M4/M5: mmap + zero-copy buffer (retains the NSData for the async block).
366
+ NSError *readError = nil;
367
+ NSData *data = [NSData dataWithContentsOfFile:path
368
+ options:NSDataReadingMappedIfSafe
369
+ error:&readError];
370
+ if (!data || data.length == 0) {
371
+ NSError *error = [NSError errorWithDomain:@"BackgroundThread"
372
+ code:EBgMgrSegmentEvalErrorIORead
373
+ userInfo:@{NSLocalizedDescriptionKey: [NSString stringWithFormat:@"Failed to read bg segment at %@%@", path, readError ? [NSString stringWithFormat:@": %@", readError.localizedDescription] : @""]}];
374
+ if (completion) completion(error);
375
+ return;
376
+ }
377
+
378
+ // M6: synthetic `seg-<id>.js` source URL for in-segment crash symbolication.
379
+ int segIdInt = segmentId.intValue;
380
+ NSString *sourceURL = [NSString stringWithFormat:@"seg-%d.js", segIdInt];
381
+
382
+ // C1: exactly-once guard shared (and retained) by the executor block and the
383
+ // watchdog. ARC-owned so the lock outlives both with no manual free.
384
+ BgMgrSettleGuard *settleGuard = [[BgMgrSettleGuard alloc] init];
385
+
386
+ CFAbsoluteTime dispatchStart = CFAbsoluteTimeGetCurrent();
387
+ [BTLogger info:[NSString stringWithFormat:@"[SplitBundle] bg loadSegment: evaluating %@ (id=%d, %lu bytes)", sourceURL, segIdInt, (unsigned long)data.length]];
388
+
389
+ // No __weak dance needed here (unlike the SharedRPC executor): this block
390
+ // does not re-dispatch onto the instance — it runs synchronously inside the
391
+ // instance's own runtime executor with a live `runtime &`, so by the time it
392
+ // executes the instance is necessarily still alive.
393
+ [instance callFunctionOnBufferedRuntimeExecutor:^(facebook::jsi::Runtime &runtime) {
394
+ @autoreleasepool {
395
+ BOOL won = [settleGuard tryClaim];
396
+ NSError *evalError = nil;
397
+ CFAbsoluteTime evalStart = CFAbsoluteTimeGetCurrent();
398
+ try {
399
+ auto buffer = std::make_shared<BgMgrNSDataJSIBuffer>(data);
400
+ runtime.evaluateJavaScript(buffer, [sourceURL UTF8String]);
401
+ double evalMs = (CFAbsoluteTimeGetCurrent() - evalStart) * 1000.0;
402
+ [BTLogger info:[NSString stringWithFormat:@"[SplitBundle] bg loadSegment: %@ evaluated in %.1fms", sourceURL, evalMs]];
403
+ } catch (const std::exception &e) {
404
+ evalError = [NSError errorWithDomain:@"BackgroundThread"
405
+ code:EBgMgrSegmentEvalErrorEvalThrow
406
+ userInfo:@{NSLocalizedDescriptionKey: [NSString stringWithFormat:@"Bg segment evaluation failed for %@: %s", sourceURL, e.what()]}];
407
+ [BTLogger error:[NSString stringWithFormat:@"[SplitBundle] bg loadSegment: %@ evaluation threw: %s", sourceURL, e.what()]];
408
+ } catch (...) {
409
+ evalError = [NSError errorWithDomain:@"BackgroundThread"
410
+ code:EBgMgrSegmentEvalErrorEvalThrow
411
+ userInfo:@{NSLocalizedDescriptionKey: [NSString stringWithFormat:@"Bg segment evaluation failed for %@ (unknown C++ exception)", sourceURL]}];
412
+ [BTLogger error:[NSString stringWithFormat:@"[SplitBundle] bg loadSegment: %@ evaluation threw an unknown exception", sourceURL]];
413
+ }
414
+ if (won) {
415
+ // Resolve AFTER eval — the ordering guarantee that fixes the race.
416
+ if (completion) completion(evalError);
417
+ } else {
418
+ [BTLogger warn:[NSString stringWithFormat:@"[SplitBundle] bg loadSegment: %@ evaluated AFTER watchdog already settled (bg entry was wedged >%.0fs)", sourceURL, kBgSegmentEvalWatchdogSeconds]];
419
+ }
420
+ }
421
+ }];
422
+
423
+ // C1 watchdog — fires only on a genuine wedge (bg entry bundle never
424
+ // finished evaluating). Rejects with a retryable timeout so the JS loader
425
+ // re-attempts. settleGuard is retained by this block, so the lock stays
426
+ // valid even if the executor block later runs.
427
+ dispatch_after(dispatch_time(DISPATCH_TIME_NOW, (int64_t)(kBgSegmentEvalWatchdogSeconds * NSEC_PER_SEC)),
428
+ dispatch_get_global_queue(QOS_CLASS_UTILITY, 0), ^{
429
+ if ([settleGuard tryClaim]) {
430
+ [BTLogger error:[NSString stringWithFormat:@"[SplitBundle] bg loadSegment: %@ WATCHDOG fired after %.0fs — bg runtime executor never ran (bg entry bundle likely never finished evaluating). Rejecting as retryable timeout.", sourceURL, kBgSegmentEvalWatchdogSeconds]];
431
+ NSError *timeoutError = [NSError errorWithDomain:@"BackgroundThread"
432
+ code:EBgMgrSegmentEvalErrorTimeout
433
+ userInfo:@{NSLocalizedDescriptionKey: [NSString stringWithFormat:@"Bg segment %@ eval timed out after %.0fs (buffered runtime executor never ran)", sourceURL, kBgSegmentEvalWatchdogSeconds]}];
434
+ if (completion) completion(timeoutError);
435
+ }
436
+ });
437
+
438
+ double dispatchMs = (CFAbsoluteTimeGetCurrent() - dispatchStart) * 1000.0;
439
+ [BTLogger info:[NSString stringWithFormat:@"[SplitBundle] bg loadSegment: %@ dispatched in %.1fms (resolve fires after eval; watchdog %.0fs)", sourceURL, dispatchMs, kBgSegmentEvalWatchdogSeconds]];
231
440
  }
232
441
 
233
442
  #pragma mark - Restart
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@onekeyfe/react-native-background-thread",
3
- "version": "3.0.63",
3
+ "version": "3.0.64",
4
4
  "description": "react-native-background-thread",
5
5
  "main": "./lib/module/index.js",
6
6
  "types": "./lib/typescript/src/index.d.ts",