@temporalio/core-bridge 0.20.0 → 0.20.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@temporalio/core-bridge",
3
- "version": "0.20.0",
3
+ "version": "0.20.1",
4
4
  "description": "Temporal.io SDK Core<>Node bridge",
5
5
  "main": "index.js",
6
6
  "types": "index.d.ts",
@@ -43,5 +43,5 @@
43
43
  "publishConfig": {
44
44
  "access": "public"
45
45
  },
46
- "gitHead": "e6b7468a00c68efd4baebbf866cf0a28c150bb6b"
46
+ "gitHead": "a9fc2f32f9e1624758f334319e2135f25ff9ed24"
47
47
  }
@@ -8,6 +8,7 @@ use crate::{
8
8
  };
9
9
  use futures::future::{BoxFuture, FutureExt};
10
10
  use parking_lot::{Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard};
11
+ use std::sync::Arc;
11
12
  use std::{
12
13
  collections::HashMap,
13
14
  fmt::Debug,
@@ -22,7 +23,7 @@ pub(crate) struct WorkflowConcurrencyManager {
22
23
  }
23
24
 
24
25
  struct ManagedRun {
25
- wfm: Mutex<WorkflowManager>,
26
+ wfm: Arc<Mutex<WorkflowManager>>,
26
27
  wft: Option<OutstandingTask>,
27
28
  activation: Option<OutstandingActivation>,
28
29
  metrics: MetricsContext,
@@ -36,7 +37,7 @@ struct ManagedRun {
36
37
  impl ManagedRun {
37
38
  fn new(wfm: WorkflowManager, metrics: MetricsContext) -> Self {
38
39
  Self {
39
- wfm: Mutex::new(wfm),
40
+ wfm: Arc::new(Mutex::new(wfm)),
40
41
  wft: None,
41
42
  activation: None,
42
43
  metrics,
@@ -266,16 +267,19 @@ impl WorkflowConcurrencyManager {
266
267
  F: for<'a> FnOnce(&'a mut WorkflowManager) -> BoxFuture<Result<Fout>>,
267
268
  Fout: Send + Debug,
268
269
  {
269
- let readlock = self.runs.read();
270
- let m = readlock
271
- .get(run_id)
272
- .ok_or_else(|| WFMachinesError::Fatal("Missing workflow machines".to_string()))?;
273
- // This holds a non-async mutex across an await point which is technically a no-no, but
274
- // we never access the machines for the same run simultaneously anyway. This should all
275
- // get fixed with a generally different approach which moves the runs inside workers.
276
- let mut wfm_mutex = m.wfm.lock();
277
- let res = mutator(&mut wfm_mutex).await;
270
+ // TODO: Slightly less than ideal. We must avoid holding the read lock on the overall
271
+ // machine map while async-ly mutating the inner machine. So, we clone the inner ArcMutex.
272
+ // We should restructure things to avoid the top-level lock on the map.
273
+
274
+ let wfm = {
275
+ let readlock = self.runs.read();
276
+ let m = readlock
277
+ .get(run_id)
278
+ .ok_or_else(|| WFMachinesError::Fatal("Missing workflow machines".to_string()))?;
279
+ m.wfm.clone()
280
+ };
278
281
 
282
+ let res = mutator(&mut wfm.lock()).await;
279
283
  res
280
284
  }
281
285
 
@@ -321,6 +325,8 @@ impl WorkflowConcurrencyManager {
321
325
  #[cfg(test)]
322
326
  mod tests {
323
327
  use super::*;
328
+ use crate::test_help::canned_histories;
329
+ use tokio::sync::Barrier;
324
330
 
325
331
  // We test mostly error paths here since the happy paths are well covered by the tests of the
326
332
  // core sdk itself, and setting up the fake data is onerous here. If we make the concurrency
@@ -342,4 +348,57 @@ mod tests {
342
348
  // Should whine that the machines have nothing to do (history empty)
343
349
  assert_matches!(res.unwrap_err(), WFMachinesError::Fatal { .. });
344
350
  }
351
+
352
+ /// This test makes sure that if we're stuck on an await within the machine mutator we don't
353
+ /// cause a deadlock if a write happens during that. This test will hang without proper
354
+ /// implementation.
355
+ #[tokio::test]
356
+ async fn aba_deadlock_prevented() {
357
+ let run_id = "some_run_id";
358
+ let timer_hist = canned_histories::single_timer("t");
359
+ let access_barr: &'static Barrier = Box::leak(Box::new(Barrier::new(2)));
360
+ let wft = timer_hist.get_history_info(1).unwrap();
361
+
362
+ let mgr = WorkflowConcurrencyManager::new();
363
+ mgr.create_or_update(
364
+ run_id,
365
+ wft.clone().into(),
366
+ "fake_wf_id",
367
+ "fake_namespace",
368
+ "fake_wf_type",
369
+ &Default::default(),
370
+ )
371
+ .await
372
+ .unwrap();
373
+ // Perform access which blocks
374
+ let access_fut = mgr.access(run_id, |_wfm| {
375
+ async {
376
+ // Wait to make sure access has started
377
+ access_barr.wait().await;
378
+ // Wait to make sure write has finished
379
+ access_barr.wait().await;
380
+ Ok(())
381
+ }
382
+ .boxed()
383
+ });
384
+ let write_fut = async {
385
+ // Wait to make sure access has started
386
+ access_barr.wait().await;
387
+ // Now try writing
388
+ mgr.create_or_update(
389
+ "different_run_id",
390
+ wft.clone().into(),
391
+ "fake_wf_id",
392
+ "fake_namespace",
393
+ "fake_wf_type",
394
+ &Default::default(),
395
+ )
396
+ .await
397
+ .unwrap();
398
+ // Indicate write has finished
399
+ access_barr.wait().await;
400
+ };
401
+ let (r1, _) = tokio::join!(access_fut, write_fut);
402
+ r1.unwrap();
403
+ }
345
404
  }