redact_ner 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -1
- data/Cargo.lock +1 -0
- data/ext/redact_ner/Cargo.toml +6 -0
- data/ext/redact_ner/src/lib.rs +66 -1
- data/lib/redact_ner/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 99ee93baed8bd1952265ca34c45657d848041a2afa3656e96d97bb07c455d1f9
|
|
4
|
+
data.tar.gz: f6d3b0f18c8e392fe8851cf951f3ed091a7d5b9a3d624c7943250b6f7f2736dc
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9028c72248a875fb83429a7d0d0a4363e9361eb9725d86c24f66f616487be05c43752e8f6ab5b8c8be0d112d9026180b65845f3082e37279fd3352c348712e6f
|
|
7
|
+
data.tar.gz: cf82b82abe1e921f2da99dd6d872b020317504f5ccf23f88fceb848bb2143730ee914d873a89244d54e316c70365e43b2a4d89ef03f89a6c86f8049bcd9791a7
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.1.3] - 2026-06-05
|
|
11
|
+
|
|
12
|
+
## What's Changed
|
|
13
|
+
* feat: release GVL during ONNX inference by @mitsuru in https://github.com/mitsuru/redact-ner-ruby/pull/7
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
**Full Changelog**: https://github.com/mitsuru/redact-ner-ruby/compare/v0.1.2...v0.1.3
|
|
17
|
+
|
|
18
|
+
|
|
10
19
|
## [0.1.2] - 2026-05-17
|
|
11
20
|
|
|
12
21
|
## What's Changed
|
|
@@ -52,7 +61,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
52
61
|
- The precompiled musl gems (Alpine / distroless) link `libstdc++`
|
|
53
62
|
dynamically; install it at runtime, e.g. `apk add --no-cache libstdc++`.
|
|
54
63
|
|
|
55
|
-
[Unreleased]: https://github.com/mitsuru/redact-ner-ruby/compare/v0.1.
|
|
64
|
+
[Unreleased]: https://github.com/mitsuru/redact-ner-ruby/compare/v0.1.3...HEAD
|
|
56
65
|
[0.1.1]: https://github.com/mitsuru/redact-ner-ruby/releases/tag/v0.1.1
|
|
57
66
|
[0.1.0]: https://github.com/mitsuru/redact-ner-ruby/releases/tag/v0.1.0
|
|
58
67
|
[0.1.2]: https://github.com/mitsuru/redact-ner-ruby/releases/tag/v0.1.2
|
|
68
|
+
[0.1.3]: https://github.com/mitsuru/redact-ner-ruby/releases/tag/v0.1.3
|
data/Cargo.lock
CHANGED
data/ext/redact_ner/Cargo.toml
CHANGED
|
@@ -13,6 +13,12 @@ magnus = "0.8"
|
|
|
13
13
|
redact-ner = "0.8.3"
|
|
14
14
|
redact-core = "0.8.3"
|
|
15
15
|
|
|
16
|
+
# Direct access to rb_thread_call_without_gvl so analyze() can release the GVL
|
|
17
|
+
# during ONNX inference. magnus 0.8 exposes no without_gvl wrapper. rb-sys is
|
|
18
|
+
# already pulled in transitively by magnus; default-features = false keeps
|
|
19
|
+
# Cargo's resolved feature set identical to magnus's (no ABI drift).
|
|
20
|
+
rb-sys = { version = ">=0.9.113", default-features = false }
|
|
21
|
+
|
|
16
22
|
# Force a vendored, statically-linked OpenSSL so cross-compilation does not
|
|
17
23
|
# depend on a per-target system OpenSSL. openssl-sys is pulled transitively
|
|
18
24
|
# via ort -> ort-sys -> ureq -> native-tls; we only pin its build mode here.
|
data/ext/redact_ner/src/lib.rs
CHANGED
|
@@ -1,6 +1,58 @@
|
|
|
1
1
|
use magnus::{function, method, prelude::*, Error, RArray, Ruby};
|
|
2
2
|
use redact_core::Recognizer as _;
|
|
3
3
|
use redact_ner::NerRecognizer;
|
|
4
|
+
use std::os::raw::c_void;
|
|
5
|
+
|
|
6
|
+
/// Run `f` with the GVL released, then re-acquire it before returning.
|
|
7
|
+
///
|
|
8
|
+
/// SAFETY CONTRACT: `f` MUST NOT touch any Ruby object or call any Ruby API —
|
|
9
|
+
/// no other Ruby C function is safe to call while the GVL is released. It is
|
|
10
|
+
/// for pure-Rust, CPU-bound (or sleeping) work only.
|
|
11
|
+
///
|
|
12
|
+
/// `f` runs synchronously on the *same* OS thread (the call blocks until it
|
|
13
|
+
/// returns), so borrows captured by `f` stay valid and no `Send` bound is
|
|
14
|
+
/// needed. Panics are caught and resumed after the GVL is re-acquired so we
|
|
15
|
+
/// never unwind across the C boundary. No unblock function is passed (NULL
|
|
16
|
+
/// ubf), so the native call is not interruptible by Thread#kill/Timeout —
|
|
17
|
+
/// acceptable because ONNX inference has no clean cancellation point.
|
|
18
|
+
fn nogvl<F, R>(f: F) -> R
|
|
19
|
+
where
|
|
20
|
+
F: FnOnce() -> R,
|
|
21
|
+
{
|
|
22
|
+
struct Data<F, R> {
|
|
23
|
+
func: Option<F>,
|
|
24
|
+
result: Option<std::thread::Result<R>>,
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
unsafe extern "C" fn trampoline<F, R>(arg: *mut c_void) -> *mut c_void
|
|
28
|
+
where
|
|
29
|
+
F: FnOnce() -> R,
|
|
30
|
+
{
|
|
31
|
+
let data = &mut *(arg as *mut Data<F, R>);
|
|
32
|
+
let func = data.func.take().expect("nogvl closure run twice");
|
|
33
|
+
data.result = Some(std::panic::catch_unwind(std::panic::AssertUnwindSafe(func)));
|
|
34
|
+
std::ptr::null_mut()
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
let mut data: Data<F, R> = Data {
|
|
38
|
+
func: Some(f),
|
|
39
|
+
result: None,
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
unsafe {
|
|
43
|
+
rb_sys::rb_thread_call_without_gvl(
|
|
44
|
+
Some(trampoline::<F, R>),
|
|
45
|
+
&mut data as *mut _ as *mut c_void,
|
|
46
|
+
None,
|
|
47
|
+
std::ptr::null_mut(),
|
|
48
|
+
);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
match data.result.take().expect("nogvl closure did not run") {
|
|
52
|
+
Ok(value) => value,
|
|
53
|
+
Err(panic) => std::panic::resume_unwind(panic),
|
|
54
|
+
}
|
|
55
|
+
}
|
|
4
56
|
|
|
5
57
|
#[magnus::wrap(class = "RedactNer::Recognizer", free_immediately, size)]
|
|
6
58
|
struct RbRecognizer {
|
|
@@ -26,8 +78,13 @@ impl RbRecognizer {
|
|
|
26
78
|
}
|
|
27
79
|
|
|
28
80
|
fn analyze_raw(&self, text: String, language: String) -> Result<RArray, Error> {
|
|
81
|
+
// ONNX inference is CPU-bound and touches no Ruby objects, so run it
|
|
82
|
+
// with the GVL released to let other Ruby threads make progress.
|
|
83
|
+
let results = nogvl(|| self.inner.analyze(&text, &language)).map_err(runtime_error)?;
|
|
84
|
+
|
|
85
|
+
// Building the Ruby Array below DOES touch Ruby, so it runs with the
|
|
86
|
+
// GVL held (we are back in normal extension context here).
|
|
29
87
|
let ruby = Ruby::get().map_err(runtime_error)?;
|
|
30
|
-
let results = self.inner.analyze(&text, &language).map_err(runtime_error)?;
|
|
31
88
|
|
|
32
89
|
let arr = ruby.ary_new_capa(results.len());
|
|
33
90
|
for r in results {
|
|
@@ -74,6 +131,13 @@ impl RbRecognizer {
|
|
|
74
131
|
.collect()
|
|
75
132
|
}
|
|
76
133
|
|
|
134
|
+
// Test-only probe used by test/test_gvl_release.rb to prove the GVL is
|
|
135
|
+
// released during native CPU-bound work. Sleeps inside the same code path
|
|
136
|
+
// that wraps inference. NOT part of the public API (leading underscore).
|
|
137
|
+
fn nogvl_sleep_ms(&self, ms: u64) {
|
|
138
|
+
nogvl(|| std::thread::sleep(std::time::Duration::from_millis(ms)));
|
|
139
|
+
}
|
|
140
|
+
|
|
77
141
|
fn min_confidence(&self) -> f32 {
|
|
78
142
|
self.inner.config().min_confidence
|
|
79
143
|
}
|
|
@@ -105,6 +169,7 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
|
105
169
|
"supported_entities",
|
|
106
170
|
method!(RbRecognizer::supported_entities, 0),
|
|
107
171
|
)?;
|
|
172
|
+
class.define_method("_nogvl_sleep_ms", method!(RbRecognizer::nogvl_sleep_ms, 1))?;
|
|
108
173
|
class.define_method("min_confidence", method!(RbRecognizer::min_confidence, 0))?;
|
|
109
174
|
class.define_method("max_seq_length", method!(RbRecognizer::max_seq_length, 0))?;
|
|
110
175
|
class.define_method("model_path", method!(RbRecognizer::model_path, 0))?;
|
data/lib/redact_ner/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: redact_ner
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Mitsuru Hayasaka
|
|
@@ -95,7 +95,7 @@ metadata:
|
|
|
95
95
|
source_code_uri: https://github.com/mitsuru/redact-ner-ruby
|
|
96
96
|
bug_tracker_uri: https://github.com/mitsuru/redact-ner-ruby/issues
|
|
97
97
|
changelog_uri: https://github.com/mitsuru/redact-ner-ruby/blob/main/CHANGELOG.md
|
|
98
|
-
documentation_uri: https://rubydoc.info/gems/redact_ner/0.1.
|
|
98
|
+
documentation_uri: https://rubydoc.info/gems/redact_ner/0.1.3
|
|
99
99
|
rubygems_mfa_required: 'true'
|
|
100
100
|
rdoc_options: []
|
|
101
101
|
require_paths:
|