disarm 0.10.0-x86_64-linux → 0.11.0-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -2
- data/lib/disarm/3.1/disarm.so +0 -0
- data/lib/disarm/3.2/disarm.so +0 -0
- data/lib/disarm/3.3/disarm.so +0 -0
- data/lib/disarm/version.rb +1 -1
- data/lib/disarm.rb +308 -10
- metadata +30 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: '0359982965e26fe7e2dd9a466af41416ba547af6524517e51b3311271f007248'
|
|
4
|
+
data.tar.gz: e4de9c08cc791e099176b09c19b08b1876f22fde7954cac5b5291d7758c6db86
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 39c2d59de362a75f6198e59eadfb4ca47a45644b9cce3b303d39c95566e38e29ee2090016897ea856f2d62a5de39c54db4f5476176ecf2ee4047a81cd137d0ed
|
|
7
|
+
data.tar.gz: 2d250406c451bb1435c790b3508d99a95a4434d743751bc4056f6682ec927706954235921b8a389027b65137bad864b5a5df82746a9067d588e10d43bce13b98
|
data/README.md
CHANGED
|
@@ -29,8 +29,10 @@ falls back to compiling from source (needs a Rust toolchain) otherwise.
|
|
|
29
29
|
require "disarm"
|
|
30
30
|
|
|
31
31
|
# Standards-based transliteration to ASCII. `scheme:` is a symbol (or string):
|
|
32
|
-
# :default (general-purpose), :strict_iso9 (ISO 9:1995), :gost7034.
|
|
32
|
+
# :default (general-purpose), :strict_iso9 (ISO 9:1995), :gost7034. `lang:`
|
|
33
|
+
# applies a language profile on top (e.g. "uk" → Київ → "Kyiv").
|
|
33
34
|
Disarm.transliterate("Москва") # => "Moskva"
|
|
35
|
+
Disarm.transliterate("Київ", lang: :uk) # => "Kyiv"
|
|
34
36
|
Disarm.transliterate("Москва", scheme: :strict_iso9)
|
|
35
37
|
|
|
36
38
|
# TR39 confusable folding (homoglyph defense). `target:` defaults to :latin.
|
|
@@ -48,7 +50,7 @@ Disarm.demojize("👍🏽", strip_modifiers: true)
|
|
|
48
50
|
|
|
49
51
|
# Security presets
|
|
50
52
|
Disarm.strip_obfuscation("Ѕ𝗲𝗰𝗿𝗲𝘁 data") # deobfuscated
|
|
51
|
-
Disarm.
|
|
53
|
+
Disarm.canonicalize("…") # homoglyph/bidi/zero-width clean
|
|
52
54
|
|
|
53
55
|
# IDN / hostname spoof check (a false result is not a safety guarantee)
|
|
54
56
|
Disarm.suspicious_hostname?("pаypal.com") # => true (Cyrillic 'а')
|
data/lib/disarm/3.1/disarm.so
CHANGED
|
Binary file
|
data/lib/disarm/3.2/disarm.so
CHANGED
|
Binary file
|
data/lib/disarm/3.3/disarm.so
CHANGED
|
Binary file
|
data/lib/disarm/version.rb
CHANGED
data/lib/disarm.rb
CHANGED
|
@@ -36,13 +36,20 @@ module Disarm
|
|
|
36
36
|
|
|
37
37
|
class << self
|
|
38
38
|
# Transliterate Unicode text to ASCII. `scheme:` selects the standard:
|
|
39
|
-
# :default (the general-purpose scheme), :strict_iso9, or :gost7034.
|
|
40
|
-
# a
|
|
41
|
-
|
|
39
|
+
# :default (the general-purpose scheme), :strict_iso9, or :gost7034. `lang:`
|
|
40
|
+
# applies a language profile on top of the scheme (e.g. "uk" → Київ → "Kyiv",
|
|
41
|
+
# "de" → ü → "ue"); nil means no profile. Both accept a String or Symbol.
|
|
42
|
+
def transliterate(text, scheme: :default, lang: nil)
|
|
42
43
|
scheme = scheme.to_s
|
|
44
|
+
lang = lang&.to_s
|
|
43
45
|
translate_errors do
|
|
44
|
-
# The bare default keeps the core's borrow-on-no-op fast
|
|
45
|
-
scheme
|
|
46
|
+
# The bare default with no profile keeps the core's borrow-on-no-op fast
|
|
47
|
+
# path; any scheme or lang takes the option-carrying builder path.
|
|
48
|
+
if lang.nil? && scheme == "default"
|
|
49
|
+
_transliterate(text)
|
|
50
|
+
else
|
|
51
|
+
_transliterate_opts(text, scheme, lang)
|
|
52
|
+
end
|
|
46
53
|
end
|
|
47
54
|
end
|
|
48
55
|
|
|
@@ -98,10 +105,40 @@ module Disarm
|
|
|
98
105
|
translate_errors { _strip_obfuscation(text) }
|
|
99
106
|
end
|
|
100
107
|
|
|
101
|
-
#
|
|
102
|
-
# other spoofing vectors.
|
|
108
|
+
# Canonicalize text for security-sensitive comparison: strip obfuscation,
|
|
109
|
+
# control characters, and other spoofing vectors. The name describes the
|
|
110
|
+
# mechanism (Unicode canonicalization for matching), not a safety guarantee —
|
|
111
|
+
# this is not an output sanitizer; encode at the sink.
|
|
112
|
+
def canonicalize(text)
|
|
113
|
+
translate_errors { _canonicalize(text) }
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# @deprecated Renamed to {#canonicalize} in 0.11 (the +_clean+ name
|
|
117
|
+
# overpromised safety); removed in 1.0.
|
|
103
118
|
def security_clean(text)
|
|
104
|
-
|
|
119
|
+
warn("[disarm] security_clean is deprecated; use canonicalize (removed in 1.0)", category: :deprecated)
|
|
120
|
+
canonicalize(text)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Case/accent/script-insensitive search lookup key. `lang:` applies a
|
|
124
|
+
# language profile for transliteration (e.g. "ru", "uk"); nil means none.
|
|
125
|
+
# Raises Disarm::InvalidArgument on an unknown lang.
|
|
126
|
+
def search_key(text, lang: nil)
|
|
127
|
+
translate_errors { _search_key(text, lang&.to_s) }
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Collation sort key (like #search_key, but keeps base accented characters
|
|
131
|
+
# for correct ordering). `lang:` applies a language profile; nil means none.
|
|
132
|
+
# Raises Disarm::InvalidArgument on an unknown lang.
|
|
133
|
+
def sort_key(text, lang: nil)
|
|
134
|
+
translate_errors { _sort_key(text, lang&.to_s) }
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Library catalog deduplication key (search_key plus confusable folding).
|
|
138
|
+
# `lang:` applies a language profile; `strict_iso9:` selects the ISO 9:1995
|
|
139
|
+
# Cyrillic scheme. Raises Disarm::InvalidArgument on an unknown lang.
|
|
140
|
+
def catalog_key(text, lang: nil, strict_iso9: false)
|
|
141
|
+
translate_errors { _catalog_key(text, lang&.to_s, strict_iso9) }
|
|
105
142
|
end
|
|
106
143
|
|
|
107
144
|
# Strip diacritics ("café" → "cafe").
|
|
@@ -114,14 +151,275 @@ module Disarm
|
|
|
114
151
|
translate_errors { _fold_case(text) }
|
|
115
152
|
end
|
|
116
153
|
|
|
117
|
-
# Whether the hostname looks like a mixed-script / confusable
|
|
118
|
-
#
|
|
154
|
+
# Whether the hostname looks like a mixed-script / confusable / bidi-reorder
|
|
155
|
+
# IDN spoof. Flags a mixed-script label, a Latin confusable, or a
|
|
156
|
+
# bidi-direction conflict (see #bidi_conflict?, the "BiDi Swap" precondition).
|
|
157
|
+
# A false result asserts nothing was *found*, not that the host is safe.
|
|
119
158
|
def suspicious_hostname?(host)
|
|
120
159
|
translate_errors { _suspicious_hostname?(host) }
|
|
121
160
|
end
|
|
122
161
|
|
|
162
|
+
# Apply a Unicode normalization form. `form:` is :nfc (default), :nfd,
|
|
163
|
+
# :nfkc, or :nfkd (a Symbol or String; case-insensitive).
|
|
164
|
+
def normalize(text, form: :nfc)
|
|
165
|
+
translate_errors { _normalize(text, form.to_s.upcase) }
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Whether `text` is already in normalization `form:` (default :nfc).
|
|
169
|
+
def normalized?(text, form: :nfc)
|
|
170
|
+
translate_errors { _normalized?(text, form.to_s.upcase) }
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# Fold every run of Unicode whitespace to a single ASCII space and trim
|
|
174
|
+
# leading/trailing whitespace (#433). Folds whitespace ONLY — the line
|
|
175
|
+
# controls (TAB/LF/VT/FF/CR), the information separators (U+001C–U+001F),
|
|
176
|
+
# NEL, the Zs/Zl/Zp spaces, and the blank-rendering set (Braille blank,
|
|
177
|
+
# Hangul fillers) each fold to a single space. It does NOT delete control or
|
|
178
|
+
# zero-width characters — use `strip_control_chars` / `strip_zero_width_chars`
|
|
179
|
+
# for that. Folding the line controls (not deleting) means "a\rb" → "a b".
|
|
180
|
+
def collapse_whitespace(text)
|
|
181
|
+
translate_errors { _collapse_whitespace(text) }
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# Remove C0/C1 control characters (except tab and newline).
|
|
185
|
+
def strip_control_chars(text)
|
|
186
|
+
translate_errors { _strip_control_chars(text) }
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Remove zero-width characters (ZWSP, ZWNJ, ZWJ, word joiner).
|
|
190
|
+
def strip_zero_width_chars(text)
|
|
191
|
+
translate_errors { _strip_zero_width_chars(text) }
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Remove Unicode bidirectional control characters (a homoglyph/spoof vector).
|
|
195
|
+
def strip_bidi(text)
|
|
196
|
+
translate_errors { _strip_bidi(text) }
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# Strip the Unicode Tags block (U+E0000-U+E007F) - the "ASCII smuggling"
|
|
200
|
+
# channel - preserving well-formed emoji subdivision flag sequences (#413).
|
|
201
|
+
def strip_tags(text)
|
|
202
|
+
translate_errors { _strip_tags(text) }
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
# Strip every variation selector (VS1-VS256) - the arbitrary-byte smuggling
|
|
206
|
+
# channel (#413).
|
|
207
|
+
def strip_variation_selectors(text)
|
|
208
|
+
translate_errors { _strip_variation_selectors(text) }
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Strip every Unicode noncharacter (U+FDD0-U+FDEF and U+xFFFE/U+xFFFF) (#413).
|
|
212
|
+
def strip_noncharacters(text)
|
|
213
|
+
translate_errors { _strip_noncharacters(text) }
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
# Strip every Private Use Area code point (BMP and planes 15/16) (#413).
|
|
217
|
+
def strip_pua(text)
|
|
218
|
+
translate_errors { _strip_pua(text) }
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
# Strip "zalgo" combining-mark stacking, keeping at most `max_marks:` (2)
|
|
222
|
+
# combining marks per base character.
|
|
223
|
+
def strip_zalgo(text, max_marks: 2)
|
|
224
|
+
translate_errors { _strip_zalgo(text, max_marks) }
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# Whether `text` looks like zalgo: any base character carries more than
|
|
228
|
+
# `threshold:` (3) combining marks.
|
|
229
|
+
def zalgo?(text, threshold: 3)
|
|
230
|
+
translate_errors { _zalgo?(text, threshold) }
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
# Number of grapheme clusters (user-perceived characters). Counts an emoji
|
|
234
|
+
# or flag as one, unlike `String#length` (code points).
|
|
235
|
+
def grapheme_len(text)
|
|
236
|
+
translate_errors { _grapheme_len(text) }
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# Split `text` into an array of grapheme-cluster strings.
|
|
240
|
+
def grapheme_split(text)
|
|
241
|
+
translate_errors { _grapheme_split(text) }
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# Truncate `text` to at most `max_graphemes` grapheme clusters, never cutting
|
|
245
|
+
# through the middle of a cluster.
|
|
246
|
+
def grapheme_truncate(text, max_graphemes)
|
|
247
|
+
translate_errors { _grapheme_truncate(text, max_graphemes) }
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# Display width (terminal columns) of a single grapheme `cluster` by East
|
|
251
|
+
# Asian Width. Pass `ambiguous_wide: true` to treat ambiguous-width
|
|
252
|
+
# characters as 2 columns.
|
|
253
|
+
def grapheme_width(cluster, ambiguous_wide: false)
|
|
254
|
+
translate_errors { _grapheme_width(cluster, ambiguous_wide) }
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
# Total display width (terminal columns) of `text`.
|
|
258
|
+
def terminal_width(text, ambiguous_wide: false)
|
|
259
|
+
translate_errors { _terminal_width(text, ambiguous_wide) }
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
# Turn arbitrary text into a safe filename. `platform:` is :universal
|
|
263
|
+
# (default), :windows, or :posix; `preserve_extension:` keeps the final
|
|
264
|
+
# extension when truncating to `max_length:`. Raises Disarm::InvalidArgument
|
|
265
|
+
# on an unknown platform.
|
|
266
|
+
def sanitize_filename(text, separator: "_", max_length: 255, platform: :universal,
|
|
267
|
+
lang: nil, preserve_extension: true)
|
|
268
|
+
translate_errors do
|
|
269
|
+
_sanitize_filename(text, separator.to_s, max_length, platform.to_s,
|
|
270
|
+
lang&.to_s, preserve_extension)
|
|
271
|
+
end
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
# Reverse-transliterate Latin back to a native script. `lang:` is :el (Greek),
|
|
275
|
+
# :ru (Russian), or :uk (Ukrainian) — a Symbol or String.
|
|
276
|
+
def reverse_transliterate(text, lang:)
|
|
277
|
+
translate_errors { _reverse_transliterate(text, lang.to_s) }
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
# Every character in `text` with no romanization, as an array of
|
|
281
|
+
# `{ char:, offset: }` hashes (byte offset), in order of appearance.
|
|
282
|
+
# `scheme:`/`lang:` mirror #transliterate.
|
|
283
|
+
def find_untranslatable(text, scheme: :default, lang: nil)
|
|
284
|
+
translate_errors do
|
|
285
|
+
_find_untranslatable(text, scheme.to_s, lang&.to_s)
|
|
286
|
+
.map { |ch, offset| { char: ch, offset: offset } }
|
|
287
|
+
end
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
# The Unicode scripts present in `text`, in first-appearance order
|
|
291
|
+
# (Common/Inherited excluded), as stable UCD identifiers (e.g. "Latin").
|
|
292
|
+
def detect_scripts(text)
|
|
293
|
+
translate_errors { _detect_scripts(text) }
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
# Whether `text` mixes characters from more than one script.
|
|
297
|
+
def mixed_script?(text)
|
|
298
|
+
translate_errors { _is_mixed_script?(text) }
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
# Whether `text` mixes strong left-to-right and strong right-to-left
|
|
302
|
+
# characters — the precondition for Bidi display-reordering (UAX #9) and the
|
|
303
|
+
# structural signal behind "BiDi Swap"-style spoofs. Fires on the real
|
|
304
|
+
# letters (no U+202x override). A false result is not a safety guarantee.
|
|
305
|
+
def bidi_conflict?(text)
|
|
306
|
+
translate_errors { _has_bidi_conflict?(text) }
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
# Explain how `lang: "auto"` detection resolves `text`: a hash with
|
|
310
|
+
# `:script`, `:chosen_lang` (both nil if undetected), `:reason`, and
|
|
311
|
+
# `:discriminators_hit`.
|
|
312
|
+
def inspect_auto_lang(text)
|
|
313
|
+
script, chosen_lang, reason, discriminators = translate_errors { _inspect_auto_lang(text) }
|
|
314
|
+
{ script: script, chosen_lang: chosen_lang, reason: reason,
|
|
315
|
+
discriminators_hit: discriminators }
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
# Curated metadata for one language `code` (e.g. "de"), as a hash with symbol
|
|
319
|
+
# keys: `:name`, `:script`, `:region`, and `:context` ("none"/"partial"/"full").
|
|
320
|
+
# Raises Disarm::InvalidArgument on an unknown code.
|
|
321
|
+
def lang_info(code)
|
|
322
|
+
translate_errors { _lang_info(code.to_s) }
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
# Curated metadata for one script `name` (e.g. "Coptic"), as a hash with symbol
|
|
326
|
+
# keys: `:name`, `:default_lang` (nil when none), `:example`, and
|
|
327
|
+
# `:context_aware`. Raises Disarm::InvalidArgument on an unknown script.
|
|
328
|
+
def script_info(name)
|
|
329
|
+
translate_errors { _script_info(name.to_s) }
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
# Every script disarm knows, as stable UCD script identifiers (includes
|
|
333
|
+
# "Common"/"Inherited"), sorted by name.
|
|
334
|
+
def list_scripts
|
|
335
|
+
translate_errors { _list_scripts }
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
# The language codes with context-aware transliteration support, sorted by code.
|
|
339
|
+
def list_context_langs
|
|
340
|
+
translate_errors { _list_context_langs }
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
# Whether any whitespace token carries out-of-place characters that disguise a
|
|
344
|
+
# real word — a cross-script homoglyph, leet, segmentation, a zero-width / bidi
|
|
345
|
+
# control, or zalgo. Reports a technical fact and leaves the malicious-or-not
|
|
346
|
+
# judgement to the caller. `lexicon` is a common-word collection (Array or Set)
|
|
347
|
+
# used only by the leet and segmentation branches; it defaults to an empty list
|
|
348
|
+
# when those branches are not needed. A bare String is rejected — pass an Array
|
|
349
|
+
# or any object responding to `:each`.
|
|
350
|
+
#
|
|
351
|
+
# For repeated calls over the same word list, build a Disarm::Lexicon once and
|
|
352
|
+
# pass it here: the native HashSet is then reused rather than rebuilt per call
|
|
353
|
+
# (HAI-SDLC 6.1).
|
|
354
|
+
def has_anomalies?(text, lexicon = [])
|
|
355
|
+
translate_errors do
|
|
356
|
+
if lexicon.is_a?(Disarm::Lexicon)
|
|
357
|
+
_has_anomalies_lex(text, lexicon)
|
|
358
|
+
else
|
|
359
|
+
_has_anomalies?(text, coerce_lexicon(lexicon))
|
|
360
|
+
end
|
|
361
|
+
end
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
# Full anomaly analysis: a hash with `:anomalous`, `:kinds` (in first-appearance
|
|
365
|
+
# order), `:findings` (each `{ kind:, token:, start:, end:, detail:, reason: }`,
|
|
366
|
+
# with byte offsets), and `:reason` (the first finding's reason, or nil).
|
|
367
|
+
# `lexicon` defaults to an empty list; a bare String is rejected. Pass a
|
|
368
|
+
# pre-built Disarm::Lexicon to reuse the native HashSet across calls (6.1).
|
|
369
|
+
def inspect_anomalies(text, lexicon = [])
|
|
370
|
+
anomalous, kinds, findings, reason =
|
|
371
|
+
translate_errors do
|
|
372
|
+
if lexicon.is_a?(Disarm::Lexicon)
|
|
373
|
+
_inspect_anomalies_lex(text, lexicon)
|
|
374
|
+
else
|
|
375
|
+
_inspect_anomalies(text, coerce_lexicon(lexicon))
|
|
376
|
+
end
|
|
377
|
+
end
|
|
378
|
+
{
|
|
379
|
+
anomalous: anomalous,
|
|
380
|
+
kinds: kinds,
|
|
381
|
+
findings: findings.map do |kind, token, start, finish, detail, fr|
|
|
382
|
+
{ kind: kind, token: token, start: start, end: finish, detail: detail, reason: fr }
|
|
383
|
+
end,
|
|
384
|
+
reason: reason,
|
|
385
|
+
}
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
# Build a reusable Disarm::Pipeline for a named policy `profile` (e.g.
|
|
389
|
+
# "search_index", "normalize_web_input"). The profile's steps are validated
|
|
390
|
+
# and assembled once at construction, so the returned handle can be reused
|
|
391
|
+
# across many `#process` calls without re-resolving the profile each time —
|
|
392
|
+
# the same reuse pattern as Disarm::Lexicon. Raises Disarm::InvalidArgument
|
|
393
|
+
# on an unknown profile name.
|
|
394
|
+
#
|
|
395
|
+
# pipe = Disarm.get_pipeline("search_index")
|
|
396
|
+
# pipe.process("Café") # => "cafe"
|
|
397
|
+
# pipe.process("Köln") # reuse the same handle
|
|
398
|
+
#
|
|
399
|
+
# Disarm::Pipeline#process is the Rust-defined instance method on the handle.
|
|
400
|
+
def get_pipeline(profile)
|
|
401
|
+
translate_errors { _get_pipeline(profile.to_s) }
|
|
402
|
+
end
|
|
403
|
+
|
|
123
404
|
private
|
|
124
405
|
|
|
406
|
+
# Coerce a lexicon argument to an Array of Strings for the native layer.
|
|
407
|
+
# Fast-path: an Array already containing only Strings is passed through as-is.
|
|
408
|
+
# Any other Enumerable (Set, etc.) is mapped to String. A bare String is rejected
|
|
409
|
+
# with ArgumentError — callers must wrap it in an Array: ["word"].
|
|
410
|
+
def coerce_lexicon(lexicon)
|
|
411
|
+
# An explicit nil is treated as an empty lexicon (parity with the `= []`
|
|
412
|
+
# default and the other bindings' null handling), not an error.
|
|
413
|
+
return [] if lexicon.nil?
|
|
414
|
+
|
|
415
|
+
raise ::ArgumentError, "lexicon must be an Array or Enumerable, not a String" \
|
|
416
|
+
if lexicon.is_a?(::String)
|
|
417
|
+
|
|
418
|
+
return lexicon if lexicon.is_a?(::Array) && lexicon.all?(::String)
|
|
419
|
+
|
|
420
|
+
lexicon.map(&:to_s)
|
|
421
|
+
end
|
|
422
|
+
|
|
125
423
|
# Run a native call, re-raising its built-in exception as the matching
|
|
126
424
|
# Disarm::Error subclass so callers can `rescue Disarm::Error` across the
|
|
127
425
|
# whole surface. The original backtrace is preserved (passed as the third
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: disarm
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.11.0
|
|
5
5
|
platform: x86_64-linux
|
|
6
6
|
authors:
|
|
7
7
|
- Richard Quinn
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-06-
|
|
11
|
+
date: 2026-06-21 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rake
|
|
@@ -52,6 +52,34 @@ dependencies:
|
|
|
52
52
|
- - "~>"
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
54
|
version: '3.0'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: rubocop
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - "~>"
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '1.65'
|
|
62
|
+
type: :development
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - "~>"
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '1.65'
|
|
69
|
+
- !ruby/object:Gem::Dependency
|
|
70
|
+
name: rubocop-performance
|
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
|
72
|
+
requirements:
|
|
73
|
+
- - "~>"
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: '1.21'
|
|
76
|
+
type: :development
|
|
77
|
+
prerelease: false
|
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - "~>"
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: '1.21'
|
|
55
83
|
description: |
|
|
56
84
|
Ruby bindings for the disarm Rust core: TR39 confusable folding, bidi/zalgo/
|
|
57
85
|
zero-width neutralization, Unicode normalization, standards-based
|