disarm 0.10.0-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +101 -0
- data/lib/disarm/3.1/disarm.so +0 -0
- data/lib/disarm/3.2/disarm.so +0 -0
- data/lib/disarm/3.3/disarm.so +0 -0
- data/lib/disarm/version.rb +6 -0
- data/lib/disarm.rb +142 -0
- metadata +104 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: c0514babf96beba18a192fbca1a48ac2d557418cbb3e6637a9fa1b4087d6f5f1
|
|
4
|
+
data.tar.gz: 9f6fe6a9da7132a7ccbe503f7a5a57101da291edbb9bac0edf41b40232c208e7
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 7b037d86d3dece4e7115ae6f097fdf20fd15f01a24373f1e530ff87bc3e574047e56bfb58359f7368864ebb99a5b79570bbbe6eb98e6c9fa8e35739144a822c1
|
|
7
|
+
data.tar.gz: d2e33ede6d8f214013ebc7d580325e88410f633b1e1c2759b47c721f8fcfa45a2e0f2dc02995da49b9474ede14ae9c23cc2fbb5caafef59d1547bff9b9cf25ca
|
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Richard Quinn
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# disarm (Ruby)
|
|
2
|
+
|
|
3
|
+
Ruby bindings for [**disarm**](https://github.com/raeq/disarm) — Unicode
|
|
4
|
+
confusable / text-security building blocks (homoglyph & bidi & zalgo handling,
|
|
5
|
+
plus standards-based transliteration), powered by Rust.
|
|
6
|
+
|
|
7
|
+
The native extension wraps the **pure-Rust `disarm` core** (no Python), via
|
|
8
|
+
[magnus](https://github.com/matsadler/magnus) + [rb-sys](https://github.com/oxidize-rb/rb-sys).
|
|
9
|
+
Precompiled platform gems install without a local Rust toolchain.
|
|
10
|
+
|
|
11
|
+
## Install
|
|
12
|
+
|
|
13
|
+
```ruby
|
|
14
|
+
# Gemfile
|
|
15
|
+
gem "disarm"
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
```sh
|
|
19
|
+
gem install disarm
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Requires Ruby >= 3.1. `gem install disarm` pulls a precompiled platform gem
|
|
23
|
+
(Linux x86_64/aarch64, macOS x86_64/arm64, Windows) when one is available, and
|
|
24
|
+
falls back to compiling from source (needs a Rust toolchain) otherwise.
|
|
25
|
+
|
|
26
|
+
## Usage
|
|
27
|
+
|
|
28
|
+
```ruby
|
|
29
|
+
require "disarm"
|
|
30
|
+
|
|
31
|
+
# Standards-based transliteration to ASCII. `scheme:` is a symbol (or string):
|
|
32
|
+
# :default (general-purpose), :strict_iso9 (ISO 9:1995), :gost7034.
|
|
33
|
+
Disarm.transliterate("Москва") # => "Moskva"
|
|
34
|
+
Disarm.transliterate("Москва", scheme: :strict_iso9)
|
|
35
|
+
|
|
36
|
+
# TR39 confusable folding (homoglyph defense). `target:` defaults to :latin.
|
|
37
|
+
Disarm.normalize_confusables("раypal") # => "paypal"
|
|
38
|
+
Disarm.confusable?("pаypal") # => true
|
|
39
|
+
Disarm.normalize_confusables("paypal", target: :cyrillic)
|
|
40
|
+
|
|
41
|
+
# Canonicalization primitives
|
|
42
|
+
Disarm.strip_accents("café") # => "cafe"
|
|
43
|
+
Disarm.fold_case("HELLO") # => "hello"
|
|
44
|
+
Disarm.slugify("Héllo Wörld") # => "hello-world"
|
|
45
|
+
Disarm.slugify("Hello World", separator: "_", max_length: 5, word_boundary: true)
|
|
46
|
+
Disarm.demojize("I ❤️ Ruby") # => "I red heart Ruby"
|
|
47
|
+
Disarm.demojize("👍🏽", strip_modifiers: true)
|
|
48
|
+
|
|
49
|
+
# Security presets
|
|
50
|
+
Disarm.strip_obfuscation("Ѕ𝗲𝗰𝗿𝗲𝘁 data") # deobfuscated
|
|
51
|
+
Disarm.security_clean("…") # homoglyph/bidi/zero-width clean
|
|
52
|
+
|
|
53
|
+
# IDN / hostname spoof check (a false result is not a safety guarantee)
|
|
54
|
+
Disarm.suspicious_hostname?("pаypal.com") # => true (Cyrillic 'а')
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Every option past the text is a keyword argument with the core's default, and
|
|
58
|
+
scheme/target tokens accept symbols or strings. `slugify` exposes the core's
|
|
59
|
+
`SlugConfig` surface (`separator:`, `lowercase:`, `max_length:`,
|
|
60
|
+
`word_boundary:`, `save_order:`, `stopwords:`, `allow_unicode:`, `lang:`,
|
|
61
|
+
`entities:`, `decimal:`, `hexadecimal:`, `safe_chars:`).
|
|
62
|
+
|
|
63
|
+
### Errors
|
|
64
|
+
|
|
65
|
+
Every public method is wrapped so that everything disarm raises descends from
|
|
66
|
+
`Disarm::Error < StandardError` — a single `rescue Disarm::Error` catches all of
|
|
67
|
+
them. An invalid scheme/target, a non-String argument, or any other bad input
|
|
68
|
+
raises the more specific `Disarm::InvalidArgument`; the original backtrace is
|
|
69
|
+
preserved so the failing call site stays visible.
|
|
70
|
+
|
|
71
|
+
```ruby
|
|
72
|
+
begin
|
|
73
|
+
Disarm.transliterate("x", scheme: :klingon)
|
|
74
|
+
rescue Disarm::InvalidArgument => e # also rescuable as Disarm::Error
|
|
75
|
+
warn e.message
|
|
76
|
+
end
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Security posture
|
|
80
|
+
|
|
81
|
+
This binding inherits the core's guarantees and limitations verbatim — it adds
|
|
82
|
+
no logic of its own. disarm is an **input-normalization** layer, not an output
|
|
83
|
+
sanitizer; read the [Threat Model](https://github.com/raeq/disarm/blob/main/THREAT_MODEL.md)
|
|
84
|
+
before relying on it in a security context.
|
|
85
|
+
|
|
86
|
+
## Development
|
|
87
|
+
|
|
88
|
+
```sh
|
|
89
|
+
cd bindings/ruby
|
|
90
|
+
bundle install
|
|
91
|
+
bundle exec rake compile # builds the native ext for the host platform
|
|
92
|
+
bundle exec rake spec # runs the RSpec suite against it
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
`bundle exec rake compile` requires a Rust toolchain (the core is a path
|
|
96
|
+
dependency until disarm 0.10 is published to crates.io). Cross-platform release
|
|
97
|
+
gems are built in CI with `rb-sys-dock`.
|
|
98
|
+
|
|
99
|
+
## License
|
|
100
|
+
|
|
101
|
+
MIT — same as the disarm core.
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
data/lib/disarm.rb
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "disarm/version"
|
|
4
|
+
|
|
5
|
+
# Load the native extension. Precompiled platform gems ship a per-minor-version
|
|
6
|
+
# subdir (e.g. lib/disarm/3.3/disarm.so); a source gem compiles to
|
|
7
|
+
# lib/disarm/disarm.so. Try the versioned path first, then fall back.
|
|
8
|
+
begin
|
|
9
|
+
ruby_minor = RUBY_VERSION[/\d+\.\d+/]
|
|
10
|
+
require_relative "disarm/#{ruby_minor}/disarm"
|
|
11
|
+
rescue LoadError => e
|
|
12
|
+
# Only fall back to the unversioned (source-gem) path when the versioned file
|
|
13
|
+
# is genuinely absent. A real load failure of an *existing* ext (e.g. a missing
|
|
14
|
+
# dependent shared library or an undefined symbol) must propagate, not be masked
|
|
15
|
+
# by the fallback.
|
|
16
|
+
raise unless e.message.include?("cannot load such file")
|
|
17
|
+
|
|
18
|
+
require_relative "disarm/disarm"
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# The native extension (ext/disarm) defines the raw `_`-prefixed shims and the
|
|
22
|
+
# already-idiomatic no-option methods (strip_accents, fold_case,
|
|
23
|
+
# suspicious_hostname?). This file adds the idiomatic Ruby surface on top (#357):
|
|
24
|
+
# keyword arguments with the core's defaults, symbol tokens (:latin, :default, …),
|
|
25
|
+
# a single transliterate(text, scheme:) entrypoint, and a Disarm::Error hierarchy.
|
|
26
|
+
# Each method is still a thin wrapper over the pure-Rust `disarm` core.
|
|
27
|
+
module Disarm
|
|
28
|
+
# Base class for every error disarm raises, so consumers can `rescue
|
|
29
|
+
# Disarm::Error`. The native shim raises Ruby's built-in ArgumentError /
|
|
30
|
+
# RuntimeError; the wrappers below translate those into this hierarchy.
|
|
31
|
+
class Error < StandardError; end
|
|
32
|
+
|
|
33
|
+
# Raised for an invalid argument — an unknown scheme/target token, a
|
|
34
|
+
# malformed option, etc. (the core's `ErrorKind::InvalidArgument`).
|
|
35
|
+
class InvalidArgument < Error; end
|
|
36
|
+
|
|
37
|
+
class << self
|
|
38
|
+
# Transliterate Unicode text to ASCII. `scheme:` selects the standard:
|
|
39
|
+
# :default (the general-purpose scheme), :strict_iso9, or :gost7034. Accepts
|
|
40
|
+
# a String or Symbol.
|
|
41
|
+
def transliterate(text, scheme: :default)
|
|
42
|
+
scheme = scheme.to_s
|
|
43
|
+
translate_errors do
|
|
44
|
+
# The bare default keeps the core's borrow-on-no-op fast path.
|
|
45
|
+
scheme == "default" ? _transliterate(text) : _transliterate_scheme(text, scheme)
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Fold cross-script confusables toward `target:` (:latin or :cyrillic).
|
|
50
|
+
def normalize_confusables(text, target: :latin)
|
|
51
|
+
translate_errors { _normalize_confusables(text, target.to_s) }
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Whether `text` contains a character confusable with `target:` (:latin or
|
|
55
|
+
# :cyrillic).
|
|
56
|
+
def confusable?(text, target: :latin)
|
|
57
|
+
translate_errors { _confusable?(text, target.to_s) }
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Generate a URL-safe slug. Mirrors the core's `SlugConfig` defaults; every
|
|
61
|
+
# option past `text` is keyword-only. (`regex_pattern`/`replacements` are not
|
|
62
|
+
# surfaced yet — see ext/disarm/src/lib.rs.)
|
|
63
|
+
def slugify(
|
|
64
|
+
text,
|
|
65
|
+
separator: "-",
|
|
66
|
+
lowercase: true,
|
|
67
|
+
max_length: 0,
|
|
68
|
+
word_boundary: false,
|
|
69
|
+
save_order: false,
|
|
70
|
+
stopwords: [],
|
|
71
|
+
allow_unicode: false,
|
|
72
|
+
lang: nil,
|
|
73
|
+
entities: true,
|
|
74
|
+
decimal: true,
|
|
75
|
+
hexadecimal: true,
|
|
76
|
+
safe_chars: ""
|
|
77
|
+
)
|
|
78
|
+
translate_errors do
|
|
79
|
+
# `Array(stopwords)` tolerates the common `stopwords: nil` (and a bare
|
|
80
|
+
# String) instead of raising NoMethodError on `.map`.
|
|
81
|
+
_slugify(
|
|
82
|
+
text, separator.to_s, lowercase, max_length, word_boundary, save_order,
|
|
83
|
+
Array(stopwords).map(&:to_s), allow_unicode, lang&.to_s, entities, decimal,
|
|
84
|
+
hexadecimal, safe_chars.to_s
|
|
85
|
+
)
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Replace emoji with their plain names (e.g. "👍" → "thumbs up").
|
|
90
|
+
# `strip_modifiers:` drops skin-tone / variation modifiers before naming.
|
|
91
|
+
def demojize(text, strip_modifiers: false)
|
|
92
|
+
translate_errors { _demojize(text, strip_modifiers) }
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Remove obfuscation (zero-width, bidi, combining-mark abuse) while keeping
|
|
96
|
+
# legible content.
|
|
97
|
+
def strip_obfuscation(text)
|
|
98
|
+
translate_errors { _strip_obfuscation(text) }
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Aggressive security cleaning: strip obfuscation, control characters, and
|
|
102
|
+
# other spoofing vectors.
|
|
103
|
+
def security_clean(text)
|
|
104
|
+
translate_errors { _security_clean(text) }
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Strip diacritics ("café" → "cafe").
|
|
108
|
+
def strip_accents(text)
|
|
109
|
+
translate_errors { _strip_accents(text) }
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Unicode case-fold ("HELLO" → "hello").
|
|
113
|
+
def fold_case(text)
|
|
114
|
+
translate_errors { _fold_case(text) }
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Whether the hostname looks like a mixed-script / confusable IDN spoof. A
|
|
118
|
+
# false result asserts nothing was *found*, not that the host is safe.
|
|
119
|
+
def suspicious_hostname?(host)
|
|
120
|
+
translate_errors { _suspicious_hostname?(host) }
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
private
|
|
124
|
+
|
|
125
|
+
# Run a native call, re-raising its built-in exception as the matching
|
|
126
|
+
# Disarm::Error subclass so callers can `rescue Disarm::Error` across the
|
|
127
|
+
# whole surface. The original backtrace is preserved (passed as the third
|
|
128
|
+
# `raise` argument) so the failing native call site stays visible. A bad
|
|
129
|
+
# argument from the native layer can arrive as ArgumentError (an invalid
|
|
130
|
+
# scheme/target), TypeError (a non-String argument), or RangeError (e.g. a
|
|
131
|
+
# negative max_length) — all map to Disarm::InvalidArgument.
|
|
132
|
+
def translate_errors
|
|
133
|
+
yield
|
|
134
|
+
rescue Error
|
|
135
|
+
raise # already in our hierarchy — don't re-wrap
|
|
136
|
+
rescue ::ArgumentError, ::TypeError, ::RangeError => e
|
|
137
|
+
raise InvalidArgument, e.message, e.backtrace
|
|
138
|
+
rescue ::RuntimeError => e
|
|
139
|
+
raise Error, e.message, e.backtrace
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: disarm
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.10.0
|
|
5
|
+
platform: aarch64-linux
|
|
6
|
+
authors:
|
|
7
|
+
- Richard Quinn
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2026-06-15 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: rake
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '13.0'
|
|
20
|
+
type: :development
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '13.0'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: rake-compiler
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '1.2'
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '1.2'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: rspec
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - "~>"
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '3.0'
|
|
48
|
+
type: :development
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - "~>"
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '3.0'
|
|
55
|
+
description: |
|
|
56
|
+
Ruby bindings for the disarm Rust core: TR39 confusable folding, bidi/zalgo/
|
|
57
|
+
zero-width neutralization, Unicode normalization, standards-based
|
|
58
|
+
transliteration, slugification, and IDN/hostname spoof detection. The native
|
|
59
|
+
extension wraps the pure-Rust core (no Python), so precompiled platform gems
|
|
60
|
+
run without a local Rust toolchain.
|
|
61
|
+
email:
|
|
62
|
+
- quinn.richard@gmail.com
|
|
63
|
+
executables: []
|
|
64
|
+
extensions: []
|
|
65
|
+
extra_rdoc_files: []
|
|
66
|
+
files:
|
|
67
|
+
- LICENSE
|
|
68
|
+
- README.md
|
|
69
|
+
- lib/disarm.rb
|
|
70
|
+
- lib/disarm/3.1/disarm.so
|
|
71
|
+
- lib/disarm/3.2/disarm.so
|
|
72
|
+
- lib/disarm/3.3/disarm.so
|
|
73
|
+
- lib/disarm/version.rb
|
|
74
|
+
homepage: https://github.com/raeq/disarm
|
|
75
|
+
licenses:
|
|
76
|
+
- MIT
|
|
77
|
+
metadata:
|
|
78
|
+
homepage_uri: https://github.com/raeq/disarm
|
|
79
|
+
source_code_uri: https://github.com/raeq/disarm
|
|
80
|
+
documentation_uri: https://docs.disarm.dev
|
|
81
|
+
rubygems_mfa_required: 'true'
|
|
82
|
+
post_install_message:
|
|
83
|
+
rdoc_options: []
|
|
84
|
+
require_paths:
|
|
85
|
+
- lib
|
|
86
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
87
|
+
requirements:
|
|
88
|
+
- - ">="
|
|
89
|
+
- !ruby/object:Gem::Version
|
|
90
|
+
version: '3.1'
|
|
91
|
+
- - "<"
|
|
92
|
+
- !ruby/object:Gem::Version
|
|
93
|
+
version: 3.4.dev
|
|
94
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
95
|
+
requirements:
|
|
96
|
+
- - ">="
|
|
97
|
+
- !ruby/object:Gem::Version
|
|
98
|
+
version: 3.3.22
|
|
99
|
+
requirements: []
|
|
100
|
+
rubygems_version: 3.5.23
|
|
101
|
+
signing_key:
|
|
102
|
+
specification_version: 4
|
|
103
|
+
summary: Unicode confusable/text-security building blocks, powered by Rust
|
|
104
|
+
test_files: []
|