uts58 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +23 -0
- data/README.md +9 -4
- data/lib/uts58/extractor.rb +3 -3
- data/lib/uts58.rb +1 -1
- data/uts58.gemspec +8 -2
- metadata +2 -2
- data/.github/workflows/ci.yml +0 -21
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 901c1246ffcea2bab7484aeacee3f1c0e38f30971badf145768d139695b8da33
|
|
4
|
+
data.tar.gz: c580d7efb9a2d569ced38309f250a6002bd0d08f4aa381b17c6d37a67b3e5bac
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9c760657f055c98806ba6c7c3b5c078c6abeeb152e96d235b2bb1c2d6c520b653d32db8eb54db1b9306ed9d4a32d3901e3e0b6f165db2e64861b902dd6d24dab
|
|
7
|
+
data.tar.gz: 6f3e5cf1ea1c88133c7d992061f01df4f0c6f230eff7be6538b1a620c27291c32920f3f4bf7fe4d6250a06b27b6590fd7e23972b993a6087de6535f3be0a27b2
|
data/LICENSE
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
|
|
2
|
+
Copyright (c) 2025, ICANN
|
|
3
|
+
|
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
|
6
|
+
|
|
7
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
8
|
+
list of conditions and the following disclaimer.
|
|
9
|
+
|
|
10
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
11
|
+
this list of conditions and the following disclaimer in the documentation
|
|
12
|
+
and/or other materials provided with the distribution.
|
|
13
|
+
|
|
14
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
15
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
16
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
17
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
18
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
19
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
20
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
21
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
22
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
23
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
# uts58
|
|
2
2
|
|
|
3
|
-
A Ruby implementation of [
|
|
3
|
+
A Ruby implementation of [UTS58](https://www.unicode.org/reports/tr58/),
|
|
4
4
|
the Unicode spec for finding links in running text. Given a chunk of text,
|
|
5
5
|
it returns the URLs and email addresses in it along with their character
|
|
6
6
|
offsets.
|
|
7
7
|
|
|
8
|
-
Both halves of
|
|
8
|
+
Both halves of UTS58 are covered: **web links** and **email addresses**.
|
|
9
9
|
The two are detected independently and can be combined.
|
|
10
10
|
|
|
11
|
-
Tested extensively on relevant OSes: [](https://github.com/arnt/uts58/actions/workflows/ruby.yml)
|
|
12
12
|
|
|
13
13
|
## Install
|
|
14
14
|
|
|
@@ -72,7 +72,7 @@ Uts58.extract_email_addresses("write to info@grå.org today")
|
|
|
72
72
|
# => ["info@grå.org"]
|
|
73
73
|
```
|
|
74
74
|
|
|
75
|
-
|
|
75
|
+
UTS58 allows Unicode local-parts, so `阿Q@例子.中国` and `उदाहरण@उदाहरण.भारत`
|
|
76
76
|
are recognised; the domain is IDN-decoded just like a URL host. A leading
|
|
77
77
|
`mailto:` in the input is folded into the matched span.
|
|
78
78
|
|
|
@@ -123,3 +123,8 @@ rdoc remains short and simple. Send mail to arnt@gulbrandsen.priv.no.
|
|
|
123
123
|
## License
|
|
124
124
|
|
|
125
125
|
BSD-2-Clause. See `LICENSE`.
|
|
126
|
+
|
|
127
|
+
FWIW, I wrote this as part of my work at ICANN and will maintain it as
|
|
128
|
+
part of the same work. (I resolve problems relating to Unicode in
|
|
129
|
+
domains, email addresses and similar, so more people, more
|
|
130
|
+
communities, can use the internet in the way they prefer.)
|
data/lib/uts58/extractor.rb
CHANGED
|
@@ -4,7 +4,7 @@ require 'public_suffix'
|
|
|
4
4
|
require_relative 'constants'
|
|
5
5
|
|
|
6
6
|
module Uts58
|
|
7
|
-
# Finds links in arbitrary text per
|
|
7
|
+
# Finds links in arbitrary text per UTS58. The public API mirrors
|
|
8
8
|
# Twitter::TwitterText::Extractor closely enough that twitter-text
|
|
9
9
|
# consumers (notably Mastodon) can easily swap one for the other.
|
|
10
10
|
#
|
|
@@ -145,7 +145,7 @@ module Uts58
|
|
|
145
145
|
# to render a <tt>:url</tt> entity. Both carry the IDN-decoded domain
|
|
146
146
|
# (A-labels become U-labels, as in #extract_urls_with_indices).
|
|
147
147
|
# +indices+ are codepoint offsets, +end+ exclusive; they cover a
|
|
148
|
-
# leading +mailto:+ in the input if there was one, per
|
|
148
|
+
# leading +mailto:+ in the input if there was one, per UTS58 5.2.
|
|
149
149
|
#
|
|
150
150
|
# A plain address such as "info@example.com" overlaps the bare domain
|
|
151
151
|
# "example.com" that #extract_urls_with_indices would find after the
|
|
@@ -184,7 +184,7 @@ module Uts58
|
|
|
184
184
|
end
|
|
185
185
|
local_start = at_pos - local.length
|
|
186
186
|
end_pos = at_pos + 1 + prefix[0].length
|
|
187
|
-
#
|
|
187
|
+
# UTS58 5.2 step 6: absorb a leading "mailto:" into the span.
|
|
188
188
|
if local_start >= 7 && text[(local_start - 7)...local_start].downcase == "mailto:"
|
|
189
189
|
local_start -= 7
|
|
190
190
|
end
|
data/lib/uts58.rb
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
# Ruby implementation of {
|
|
3
|
+
# Ruby implementation of {UTS58}[https://www.unicode.org/reports/tr58/],
|
|
4
4
|
# the Unicode spec for finding links in running text.
|
|
5
5
|
#
|
|
6
6
|
# The two entry points below are module-level shortcuts around a single
|
data/uts58.gemspec
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
Gem::Specification.new do |spec|
|
|
4
4
|
spec.name = "uts58"
|
|
5
|
-
spec.version = "0.2.
|
|
5
|
+
spec.version = "0.2.2"
|
|
6
6
|
spec.authors = ["Arnt Gulbrandsen"]
|
|
7
7
|
spec.email = ["arnt@gulbrandsen.priv.no"]
|
|
8
8
|
|
|
@@ -12,10 +12,16 @@ Gem::Specification.new do |spec|
|
|
|
12
12
|
spec.required_ruby_version = Gem::Requirement.new(">= 3.1")
|
|
13
13
|
spec.licenses = ["BSD-2-Clause"]
|
|
14
14
|
|
|
15
|
+
# The license lives once at the repository root; copy it in so it ships in
|
|
16
|
+
# the gem, since git ls-files below only reaches files under ruby/.
|
|
17
|
+
require "fileutils"
|
|
18
|
+
FileUtils.cp(File.expand_path("../LICENSE", __dir__),
|
|
19
|
+
File.expand_path("LICENSE", __dir__))
|
|
20
|
+
|
|
15
21
|
# Specify which files should be added to the gem when it is released.
|
|
16
22
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
|
17
23
|
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
|
18
|
-
`git ls-files -z 2>/dev/null`.split("\x0").reject { |f| f.match(%r{^(bin|test|spec|features|rfcs|tools)/}) }
|
|
24
|
+
`git ls-files -z 2>/dev/null`.split("\x0").reject { |f| f.match(%r{^(bin|test|spec|features|rfcs|tools)/}) } + ["LICENSE"]
|
|
19
25
|
end
|
|
20
26
|
spec.require_paths = ["lib"]
|
|
21
27
|
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: uts58
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Arnt Gulbrandsen
|
|
@@ -98,8 +98,8 @@ executables: []
|
|
|
98
98
|
extensions: []
|
|
99
99
|
extra_rdoc_files: []
|
|
100
100
|
files:
|
|
101
|
-
- ".github/workflows/ci.yml"
|
|
102
101
|
- Gemfile
|
|
102
|
+
- LICENSE
|
|
103
103
|
- README.md
|
|
104
104
|
- lib/uts58.rb
|
|
105
105
|
- lib/uts58/constants.rb
|
data/.github/workflows/ci.yml
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
name: CI
|
|
2
|
-
|
|
3
|
-
on:
|
|
4
|
-
push:
|
|
5
|
-
branches: [main]
|
|
6
|
-
pull_request:
|
|
7
|
-
|
|
8
|
-
jobs:
|
|
9
|
-
test:
|
|
10
|
-
runs-on: ubuntu-latest
|
|
11
|
-
strategy:
|
|
12
|
-
fail-fast: false
|
|
13
|
-
matrix:
|
|
14
|
-
ruby: ["3.1", "3.2", "3.3", "3.4"]
|
|
15
|
-
steps:
|
|
16
|
-
- uses: actions/checkout@v4
|
|
17
|
-
- uses: ruby/setup-ruby@v1
|
|
18
|
-
with:
|
|
19
|
-
ruby-version: ${{ matrix.ruby }}
|
|
20
|
-
bundler-cache: true
|
|
21
|
-
- run: bundle exec rspec
|