philiprehberger-string_kit 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/README.md +19 -0
- data/lib/philiprehberger/string_kit/version.rb +1 -1
- data/lib/philiprehberger/string_kit.rb +47 -0
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4a290f7b9aa5a9534b5f0f55086b416671e860b89bd58df4dae1558e8451f489
|
|
4
|
+
data.tar.gz: e4162f06038b8da1ad5f3b67249e3c1776fe9213014c2287a305addc163a978e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: df68e8c923b8ab6693894e01068317abbb4254ab10063d5859c81dbc2e1c585160e7ec4fa94189329b25ad86ae8b6b6728fa153c990fd74537b22653796abd51
|
|
7
|
+
data.tar.gz: d41749d919b01bff185fe0bd555b65a0b7c2989e27fb9cb7d0b33cd301fdeabb1491467637eee0ac09b5eb9d29c0d4481d47c73f0a9fbadc7c2fa2e93cf2f84e
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,19 @@ and this gem adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.3.0] - 2026-04-25
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- `StringKit.strip_zero_width` removes zero-width and invisible Unicode characters
|
|
14
|
+
- `StringKit.levenshtein` returns Levenshtein edit distance between two strings
|
|
15
|
+
- `StringKit.similarity` returns a 0.0–1.0 similarity score derived from Levenshtein distance
|
|
16
|
+
|
|
17
|
+
## [0.2.1] - 2026-04-15
|
|
18
|
+
|
|
19
|
+
### Fixed
|
|
20
|
+
- Align issue templates with guide: add required `gem-version` input to bug report and `alternatives` textarea plus code placeholder to feature request
|
|
21
|
+
- Pin `actions/checkout` to `v5` in CI workflow to match guide
|
|
22
|
+
|
|
10
23
|
## [0.2.0] - 2026-04-03
|
|
11
24
|
|
|
12
25
|
### Added
|
data/README.md
CHANGED
|
@@ -75,6 +75,22 @@ Philiprehberger::StringKit.indent("hello\nworld", 2) # => " hello\n
|
|
|
75
75
|
Philiprehberger::StringKit.dedent(" hello\n world") # => "hello\nworld"
|
|
76
76
|
```
|
|
77
77
|
|
|
78
|
+
### Zero-Width Characters
|
|
79
|
+
|
|
80
|
+
```ruby
|
|
81
|
+
require "philiprehberger/string_kit"
|
|
82
|
+
|
|
83
|
+
raw = "helloworld"
|
|
84
|
+
Philiprehberger::StringKit.strip_zero_width(raw) # => "helloworld"
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### String Similarity
|
|
88
|
+
|
|
89
|
+
```ruby
|
|
90
|
+
Philiprehberger::StringKit.levenshtein('kitten', 'sitting') # => 3
|
|
91
|
+
Philiprehberger::StringKit.similarity('kitten', 'sitting') # => ~0.571
|
|
92
|
+
```
|
|
93
|
+
|
|
78
94
|
## API
|
|
79
95
|
|
|
80
96
|
| Method | Description |
|
|
@@ -99,6 +115,9 @@ Philiprehberger::StringKit.dedent(" hello\n world") # => "hello\nworl
|
|
|
99
115
|
| `StringKit.squeeze(str)` | Remove consecutive duplicate characters |
|
|
100
116
|
| `StringKit.indent(str, n)` | Indent each line by n spaces |
|
|
101
117
|
| `StringKit.dedent(str)` | Remove common leading whitespace |
|
|
118
|
+
| `.strip_zero_width(str)` | Remove zero-width and invisible Unicode characters |
|
|
119
|
+
| `.levenshtein(a, b)` | Edit distance between two strings |
|
|
120
|
+
| `.similarity(a, b)` | 0.0–1.0 similarity derived from Levenshtein distance |
|
|
102
121
|
|
|
103
122
|
## Development
|
|
104
123
|
|
|
@@ -278,6 +278,53 @@ module Philiprehberger
|
|
|
278
278
|
str.swapcase
|
|
279
279
|
end
|
|
280
280
|
|
|
281
|
+
# Removes zero-width and invisible Unicode characters from `str`.
|
|
282
|
+
# Useful when ingesting content copied from web pages.
|
|
283
|
+
#
|
|
284
|
+
# @param str [String]
|
|
285
|
+
# @return [String]
|
|
286
|
+
def self.strip_zero_width(str)
|
|
287
|
+
str.gsub(/[]/, '')
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
# Levenshtein edit distance between `a` and `b`.
|
|
291
|
+
#
|
|
292
|
+
# @param a [String]
|
|
293
|
+
# @param b [String]
|
|
294
|
+
# @return [Integer]
|
|
295
|
+
def self.levenshtein(a, b)
|
|
296
|
+
return b.length if a.empty?
|
|
297
|
+
return a.length if b.empty?
|
|
298
|
+
|
|
299
|
+
prev = (0..b.length).to_a
|
|
300
|
+
curr = Array.new(b.length + 1)
|
|
301
|
+
|
|
302
|
+
a.each_char.with_index do |ac, i|
|
|
303
|
+
curr[0] = i + 1
|
|
304
|
+
b.each_char.with_index do |bc, j|
|
|
305
|
+
cost = ac == bc ? 0 : 1
|
|
306
|
+
curr[j + 1] = [curr[j] + 1, prev[j + 1] + 1, prev[j] + cost].min
|
|
307
|
+
end
|
|
308
|
+
prev = curr.dup
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
prev[b.length]
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
# Similarity score between 0.0 and 1.0 derived from Levenshtein distance.
|
|
315
|
+
# Returns 1.0 for identical strings, 1.0 for two empty strings, and
|
|
316
|
+
# `1 - distance / max_length` otherwise.
|
|
317
|
+
#
|
|
318
|
+
# @param a [String]
|
|
319
|
+
# @param b [String]
|
|
320
|
+
# @return [Float]
|
|
321
|
+
def self.similarity(a, b)
|
|
322
|
+
max = [a.length, b.length].max
|
|
323
|
+
return 1.0 if max.zero?
|
|
324
|
+
|
|
325
|
+
1.0 - (levenshtein(a, b).to_f / max)
|
|
326
|
+
end
|
|
327
|
+
|
|
281
328
|
class << self
|
|
282
329
|
private
|
|
283
330
|
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: philiprehberger-string_kit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Philip Rehberger
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-04-
|
|
11
|
+
date: 2026-04-26 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: String case conversion, slug generation, transliteration, padding, HTML
|
|
14
14
|
stripping, whitespace normalization, word counting, reading time estimation, excerpt
|