iriq 0.1.0 → 0.30.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +87 -0
- data/CLAUDE.md +208 -0
- data/Gemfile.lock +8 -2
- data/Makefile +113 -0
- data/README.md +249 -270
- data/completions/_iriq +52 -0
- data/completions/iriq.bash +70 -0
- data/docs/ARCHITECTURE.md +223 -0
- data/docs/ROADMAP.md +190 -0
- data/iriq.gemspec +5 -4
- data/lib/iriq/cli.rb +402 -49
- data/lib/iriq/cluster.rb +304 -8
- data/lib/iriq/clusterer.rb +19 -44
- data/lib/iriq/corpus.rb +417 -81
- data/lib/iriq/cross_host_shape.rb +37 -0
- data/lib/iriq/event.rb +22 -0
- data/lib/iriq/evidence.rb +114 -0
- data/lib/iriq/explanation.rb +1 -1
- data/lib/iriq/normalizer.rb +71 -29
- data/lib/iriq/parser.rb +1 -1
- data/lib/iriq/path_shape.rb +30 -24
- data/lib/iriq/position.rb +75 -0
- data/lib/iriq/position_stats.rb +74 -8
- data/lib/iriq/recognizer.rb +54 -0
- data/lib/iriq/recognizer_proposal.rb +167 -0
- data/lib/iriq/recognizers/date.rb +53 -0
- data/lib/iriq/recognizers/integer.rb +37 -0
- data/lib/iriq/recognizers/uuid.rb +16 -0
- data/lib/iriq/reducer.rb +37 -0
- data/lib/iriq/registrable_domain.rb +56 -0
- data/lib/iriq/segment_classifier.rb +475 -23
- data/lib/iriq/segment_hints.rb +9 -0
- data/lib/iriq/shape.rb +106 -0
- data/lib/iriq/specificity.rb +35 -0
- data/lib/iriq/storage/json.rb +43 -0
- data/lib/iriq/storage/memory.rb +209 -0
- data/lib/iriq/storage/sqlite.rb +546 -0
- data/lib/iriq/storage.rb +35 -0
- data/lib/iriq/synthesized_recognizer.rb +56 -0
- data/lib/iriq/trace.rb +294 -0
- data/lib/iriq/version.rb +1 -1
- data/lib/iriq.rb +18 -0
- metadata +44 -8
- data/script/benchmark.rb +0 -81
- data/script/memory.rb +0 -121
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
require "set"
|
|
2
|
+
|
|
3
|
+
module Iriq
|
|
4
|
+
# A suggestion that a new Recognizer should be added to the system.
|
|
5
|
+
#
|
|
6
|
+
# Emitted by Corpus#propose_recognizers. NOT automatically activated —
|
|
7
|
+
# proposals carry enough evidence for a human to judge whether to add
|
|
8
|
+
# the Recognizer to the built-in set (or, later, to register it
|
|
9
|
+
# dynamically via a public Recognizer registry).
|
|
10
|
+
#
|
|
11
|
+
# prefix — the detected shape signature (e.g. "ghp_")
|
|
12
|
+
# suggested_type — Symbol name we'd register the Recognizer under
|
|
13
|
+
# if accepted (e.g. :ghp)
|
|
14
|
+
# positions — every Position where the proposal matched
|
|
15
|
+
# hosts — distinct hosts the proposal was seen at; a high
|
|
16
|
+
# count is strong evidence the pattern isn't
|
|
17
|
+
# host-local
|
|
18
|
+
# coverage — fraction of sampled observations at affected
|
|
19
|
+
# Positions matching the proposal pattern
|
|
20
|
+
# observation_count — total matching observations across positions
|
|
21
|
+
# sample_values — up to 5 example matches, for the human reviewer
|
|
22
|
+
# strategy — the ProposalStrategy that emitted this record
|
|
23
|
+
class RecognizerProposal
|
|
24
|
+
attr_reader :prefix, :suggested_type, :positions, :hosts,
|
|
25
|
+
:coverage, :confidence, :observation_count,
|
|
26
|
+
:sample_values, :strategy
|
|
27
|
+
|
|
28
|
+
def initialize(prefix:, suggested_type:, positions:, hosts:,
|
|
29
|
+
coverage:, observation_count:, sample_values:,
|
|
30
|
+
strategy:, confidence: nil)
|
|
31
|
+
@prefix = prefix
|
|
32
|
+
@suggested_type = suggested_type
|
|
33
|
+
@positions = positions.freeze
|
|
34
|
+
@hosts = hosts.is_a?(Set) ? hosts.dup.freeze : Set.new(hosts).freeze
|
|
35
|
+
@coverage = coverage
|
|
36
|
+
@observation_count = observation_count
|
|
37
|
+
@sample_values = sample_values.freeze
|
|
38
|
+
@strategy = strategy
|
|
39
|
+
@confidence = confidence.nil? ? compute_confidence : confidence
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def to_h
|
|
43
|
+
{
|
|
44
|
+
prefix: @prefix,
|
|
45
|
+
suggested_type: @suggested_type,
|
|
46
|
+
positions: @positions.map(&:to_h),
|
|
47
|
+
hosts: @hosts.to_a.sort,
|
|
48
|
+
coverage: @coverage,
|
|
49
|
+
confidence: @confidence,
|
|
50
|
+
observation_count: @observation_count,
|
|
51
|
+
sample_values: @sample_values,
|
|
52
|
+
strategy: @strategy,
|
|
53
|
+
}
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
private
|
|
57
|
+
|
|
58
|
+
# Confidence = coverage + linear cross-host boost, capped at 1.0.
|
|
59
|
+
# Single-host proposals get their raw coverage as confidence (no
|
|
60
|
+
# boost). Each additional host adds CROSS_HOST_BOOST_PER_HOST to
|
|
61
|
+
# the score. A proposal supported by ~10 distinct hosts caps out
|
|
62
|
+
# regardless of raw coverage; below that, both signals compose.
|
|
63
|
+
def compute_confidence
|
|
64
|
+
boost = (@hosts.size - 1) * ProposalStrategy::CROSS_HOST_BOOST_PER_HOST
|
|
65
|
+
score = @coverage + boost
|
|
66
|
+
score > 1.0 ? 1.0 : score
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Pluggable proposal-detection strategies. Each strategy.propose(storage, **opts)
|
|
71
|
+
# returns an array of RecognizerProposal. Adding a new detection rule =
|
|
72
|
+
# add a class with #propose; register it via DEFAULTS.
|
|
73
|
+
module ProposalStrategy
|
|
74
|
+
# Default minimum total matching observations across positions before
|
|
75
|
+
# we'll emit a proposal. Below this the signal is too noisy.
|
|
76
|
+
DEFAULT_MIN_OBSERVATIONS = 20
|
|
77
|
+
# Fraction of sampled observations at affected Positions that must
|
|
78
|
+
# match the proposal pattern.
|
|
79
|
+
DEFAULT_MIN_COVERAGE = 0.7
|
|
80
|
+
# Minimum number of distinct hosts the proposal must appear at. For
|
|
81
|
+
# single-host corpora this defaults to 1; bumping to 2+ promotes
|
|
82
|
+
# cross-host patterns over host-local ones.
|
|
83
|
+
DEFAULT_MIN_HOSTS = 1
|
|
84
|
+
# Confidence boost added per additional host beyond the first. A
|
|
85
|
+
# pattern seen on 10+ hosts caps out the boost (+0.45 ≈ 1.0 when
|
|
86
|
+
# combined with any reasonable coverage); single-host patterns get
|
|
87
|
+
# no boost (their coverage IS their confidence).
|
|
88
|
+
CROSS_HOST_BOOST_PER_HOST = 0.05
|
|
89
|
+
|
|
90
|
+
# Detects `<prefix>_<alphanumeric>` patterns at slug/opaque_id
|
|
91
|
+
# positions — the GitHub PAT (`ghp_…`), Stripe customer ID (`cus_…`),
|
|
92
|
+
# AWS-style (`sk_test_…` — partial match), Twilio SID-with-letter-
|
|
93
|
+
# prefix family. Restricting the suffix to alphanumeric (no further
|
|
94
|
+
# separators) keeps real slugs (`my-cool-post`, `red_team_member`)
|
|
95
|
+
# from triggering false proposals.
|
|
96
|
+
class PrefixUnderscoreId
|
|
97
|
+
PATTERN = /\A([a-z]+)_([A-Za-z0-9]+)\z/.freeze
|
|
98
|
+
NAME = :prefix_underscore_id
|
|
99
|
+
|
|
100
|
+
def propose(storage,
|
|
101
|
+
min_observations: DEFAULT_MIN_OBSERVATIONS,
|
|
102
|
+
min_coverage: DEFAULT_MIN_COVERAGE,
|
|
103
|
+
min_hosts: DEFAULT_MIN_HOSTS)
|
|
104
|
+
per_prefix = Hash.new { |h, k| h[k] = empty_accumulator }
|
|
105
|
+
|
|
106
|
+
storage.each_position_stats do |position, stats|
|
|
107
|
+
next unless slug_or_opaque?(stats)
|
|
108
|
+
|
|
109
|
+
stats.value_counts.each do |value, count|
|
|
110
|
+
m = PATTERN.match(value) or next
|
|
111
|
+
prefix = "#{m[1]}_"
|
|
112
|
+
acc = per_prefix[prefix]
|
|
113
|
+
acc[:matching_count] += count
|
|
114
|
+
acc[:position_observations] += stats.total unless acc[:positions].include?(position)
|
|
115
|
+
acc[:positions] << position
|
|
116
|
+
acc[:hosts] << position.host
|
|
117
|
+
# Collect every match; we'll sort + cap to a stable top-N at
|
|
118
|
+
# emission time so Ruby and Go produce identical samples
|
|
119
|
+
# regardless of underlying Hash / map iteration order.
|
|
120
|
+
acc[:matches] << value
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
per_prefix.filter_map { |prefix, acc|
|
|
125
|
+
next nil if acc[:matching_count] < min_observations
|
|
126
|
+
next nil if acc[:hosts].size < min_hosts
|
|
127
|
+
|
|
128
|
+
coverage = acc[:matching_count].to_f / acc[:position_observations]
|
|
129
|
+
next nil if coverage < min_coverage
|
|
130
|
+
|
|
131
|
+
RecognizerProposal.new(
|
|
132
|
+
prefix: prefix,
|
|
133
|
+
suggested_type: prefix.chomp("_").to_sym,
|
|
134
|
+
positions: acc[:positions].to_a,
|
|
135
|
+
hosts: acc[:hosts],
|
|
136
|
+
coverage: coverage,
|
|
137
|
+
observation_count: acc[:matching_count],
|
|
138
|
+
# Sort + cap to 5 so Ruby and Go produce identical samples
|
|
139
|
+
# regardless of underlying Hash / map iteration order. The
|
|
140
|
+
# samples are illustrative for humans; alphabetical is fine.
|
|
141
|
+
sample_values: acc[:matches].sort.first(5),
|
|
142
|
+
strategy: NAME,
|
|
143
|
+
)
|
|
144
|
+
}.sort_by { |p| [-p.confidence, p.prefix] }
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
private
|
|
148
|
+
|
|
149
|
+
def empty_accumulator
|
|
150
|
+
{
|
|
151
|
+
positions: Set.new,
|
|
152
|
+
hosts: Set.new,
|
|
153
|
+
matching_count: 0,
|
|
154
|
+
position_observations: 0,
|
|
155
|
+
matches: [],
|
|
156
|
+
}
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def slug_or_opaque?(stats)
|
|
160
|
+
dom = stats.type_counts.max_by { |_, c| c }&.first
|
|
161
|
+
dom == :slug || dom == :opaque_id
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
DEFAULTS = [PrefixUnderscoreId.new].freeze
|
|
166
|
+
end
|
|
167
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
module Iriq
|
|
2
|
+
module Recognizers
|
|
3
|
+
# ISO 8601 (YYYY-MM-DD), slash form (YYYY/MM/DD), and US-style
|
|
4
|
+
# (M/D/YYYY) date shapes. Compact YYYYMMDD lives on the Integer
|
|
5
|
+
# recognizer — it sees the digits-only input first.
|
|
6
|
+
#
|
|
7
|
+
# Conservative: DD/MM/YYYY is intentionally NOT recognized — from a
|
|
8
|
+
# bare segment we can't tell it apart from MM/DD/YYYY.
|
|
9
|
+
class Date < Recognizer
|
|
10
|
+
ISO_PATTERN = /\A\d{4}-\d{2}-\d{2}\z/.freeze
|
|
11
|
+
SLASH_PATTERN = %r{\A\d{4}/\d{2}/\d{2}\z}.freeze
|
|
12
|
+
US_PATTERN = %r{\A(\d{1,2})/(\d{1,2})/(\d{4})\z}.freeze
|
|
13
|
+
|
|
14
|
+
def try(segment)
|
|
15
|
+
has_dash = segment.include?("-")
|
|
16
|
+
has_slash = segment.include?("/")
|
|
17
|
+
return nil unless has_dash || has_slash
|
|
18
|
+
unless ISO_PATTERN.match?(segment) ||
|
|
19
|
+
SLASH_PATTERN.match?(segment) ||
|
|
20
|
+
US_PATTERN.match?(segment)
|
|
21
|
+
return nil
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
{ type: :date, confidence: 1.0, specificity: Specificity::STRUCTURED }
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Canonicalize a recognized date to ISO 8601 (YYYY-MM-DD). nil for
|
|
28
|
+
# non-date / implausible-date values. Day-of-month validity (Feb 30,
|
|
29
|
+
# Apr 31) deliberately not checked — out of scope for a heuristic.
|
|
30
|
+
def self.canonical(value)
|
|
31
|
+
return nil if value.nil?
|
|
32
|
+
|
|
33
|
+
case value
|
|
34
|
+
when ISO_PATTERN
|
|
35
|
+
plausible?(value[0, 4], value[5, 2], value[8, 2]) ? value : nil
|
|
36
|
+
when SLASH_PATTERN
|
|
37
|
+
plausible?(value[0, 4], value[5, 2], value[8, 2]) ? value.tr("/", "-") : nil
|
|
38
|
+
when US_PATTERN
|
|
39
|
+
m = ::Regexp.last_match
|
|
40
|
+
mm, dd, yyyy = m[1].rjust(2, "0"), m[2].rjust(2, "0"), m[3]
|
|
41
|
+
plausible?(yyyy, mm, dd) ? "#{yyyy}-#{mm}-#{dd}" : nil
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def self.plausible?(y, m, d)
|
|
46
|
+
yi = y.to_i; mi = m.to_i; di = d.to_i
|
|
47
|
+
yi.between?(1900, 2100) && mi.between?(1, 12) && di.between?(1, 31)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
DATE = Date.new
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
module Iriq
|
|
2
|
+
module Recognizers
|
|
3
|
+
# Base-10 integer. Also returns :timestamp for plausible UNIX seconds /
|
|
4
|
+
# millis ranges, and :date for plausible YYYYMMDD compact dates — these
|
|
5
|
+
# share the digit-only lexical shape, and we want the most specific type.
|
|
6
|
+
class Integer < Recognizer
|
|
7
|
+
PATTERN = /\A\d+\z/.freeze
|
|
8
|
+
COMPACT_DATE_PATTERN = /\A\d{8}\z/.freeze
|
|
9
|
+
TS_SECONDS_RANGE = 1_000_000_000..9_999_999_999
|
|
10
|
+
TS_MILLIS_RANGE = 1_000_000_000_000..9_999_999_999_999
|
|
11
|
+
|
|
12
|
+
def try(segment)
|
|
13
|
+
first = segment.getbyte(0)
|
|
14
|
+
digit0 = first && first >= 0x30 && first <= 0x39
|
|
15
|
+
return nil unless digit0 && PATTERN.match?(segment)
|
|
16
|
+
|
|
17
|
+
n = segment.to_i
|
|
18
|
+
if TS_MILLIS_RANGE.cover?(n) || TS_SECONDS_RANGE.cover?(n)
|
|
19
|
+
return { type: :timestamp, confidence: 1.0, specificity: Specificity::BOUNDED }
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
if COMPACT_DATE_PATTERN.match?(segment)
|
|
23
|
+
y = segment[0, 4].to_i
|
|
24
|
+
m = segment[4, 2].to_i
|
|
25
|
+
d = segment[6, 2].to_i
|
|
26
|
+
if y.between?(1900, 2100) && m.between?(1, 12) && d.between?(1, 31)
|
|
27
|
+
return { type: :date, confidence: 1.0, specificity: Specificity::STRUCTURED }
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
{ type: :integer, confidence: 1.0, specificity: Specificity::TYPED }
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
INTEGER = Integer.new
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
module Iriq
|
|
2
|
+
module Recognizers
|
|
3
|
+
# RFC 4122 UUID. Shape-only — does not validate version/variant bits.
|
|
4
|
+
class Uuid < Recognizer
|
|
5
|
+
PATTERN = /\A\h{8}-\h{4}-\h{4}-\h{4}-\h{12}\z/.freeze
|
|
6
|
+
|
|
7
|
+
def try(segment)
|
|
8
|
+
return nil unless segment.size == 36 && segment.include?("-") && PATTERN.match?(segment)
|
|
9
|
+
|
|
10
|
+
{ type: :uuid, confidence: 1.0, specificity: Specificity::SEMANTIC }
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
UUID = Uuid.new
|
|
15
|
+
end
|
|
16
|
+
end
|
data/lib/iriq/reducer.rb
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
module Iriq
|
|
2
|
+
# Reducers consume the Event stream emitted by Corpus#observe and update
|
|
3
|
+
# the storage backend's materialized views. Each Reducer is a callable
|
|
4
|
+
# that takes (event, storage) and applies the appropriate storage
|
|
5
|
+
# operation; non-applicable events are no-ops via the EVENT_TYPES gate.
|
|
6
|
+
#
|
|
7
|
+
# Adding a new metric is: define a new Event subtype, write a Reducer that
|
|
8
|
+
# handles it, register it in DEFAULTS — no other module changes.
|
|
9
|
+
module Reducer
|
|
10
|
+
# Each entry: { event_class => [lambda(event, storage) -> result] }.
|
|
11
|
+
# Lambdas may return the result of the underlying storage call so
|
|
12
|
+
# callers (Corpus#observe) can pick up the cluster they need to return.
|
|
13
|
+
DEFAULTS = {
|
|
14
|
+
Event::HostSeen => [->(e, s) { s.increment_host(e.host) }],
|
|
15
|
+
Event::PathLengthSeen => [->(e, s) { s.increment_path_length(e.length) }],
|
|
16
|
+
Event::RawShapeSeen => [->(e, s) { s.increment_raw_shape(e.shape) }],
|
|
17
|
+
Event::FingerprintSeen => [->(e, s) { s.increment_fingerprint(e.shape) }],
|
|
18
|
+
Event::PositionSeen => [->(e, s) { s.observe_position(e.position, e.value, e.type) }],
|
|
19
|
+
Event::ClusterAddition => [->(e, s) { s.add_to_cluster(e.key, e.host, e.scheme, e.shape, e.identifier) }],
|
|
20
|
+
}.freeze
|
|
21
|
+
|
|
22
|
+
module_function
|
|
23
|
+
|
|
24
|
+
# Apply the event to the storage via all Reducers registered for its
|
|
25
|
+
# type. Returns the last non-nil reducer result — used by Corpus#observe
|
|
26
|
+
# to pick up the Cluster created/updated by Event::ClusterAddition.
|
|
27
|
+
def apply(event, storage, reducers: DEFAULTS)
|
|
28
|
+
results = reducers.fetch(event.class, [])
|
|
29
|
+
result = nil
|
|
30
|
+
results.each do |r|
|
|
31
|
+
rv = r.call(event, storage)
|
|
32
|
+
result = rv unless rv.nil?
|
|
33
|
+
end
|
|
34
|
+
result
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
module Iriq
|
|
2
|
+
# Heuristic registrable-domain extractor. Strips subdomains so that
|
|
3
|
+
# api.foo.com and app.foo.com both resolve to foo.com.
|
|
4
|
+
#
|
|
5
|
+
# Uses an inline allowlist of the ~50 most common multi-label public
|
|
6
|
+
# suffixes (.co.uk, .com.au, .gov.uk, etc.) — covers the long tail of
|
|
7
|
+
# real-world traffic without the ~3 MB cost of bundling the full Public
|
|
8
|
+
# Suffix List. Niche multi-label TLDs (.priv.no, .tas.gov.au, etc.) will
|
|
9
|
+
# be over-stripped; install the `public_suffix` gem and wire it in if
|
|
10
|
+
# accuracy on those matters for your workload.
|
|
11
|
+
module RegistrableDomain
|
|
12
|
+
# rubocop:disable Layout/LineLength
|
|
13
|
+
TWO_LABEL_SUFFIXES = %w[
|
|
14
|
+
co.uk org.uk gov.uk ac.uk net.uk me.uk ltd.uk plc.uk sch.uk
|
|
15
|
+
co.jp ac.jp or.jp ne.jp go.jp gr.jp ed.jp lg.jp
|
|
16
|
+
com.au net.au org.au edu.au gov.au asn.au id.au
|
|
17
|
+
co.nz net.nz org.nz govt.nz ac.nz school.nz
|
|
18
|
+
com.br net.br org.br gov.br edu.br
|
|
19
|
+
com.cn net.cn org.cn gov.cn edu.cn ac.cn
|
|
20
|
+
co.za net.za org.za gov.za ac.za
|
|
21
|
+
co.kr ne.kr or.kr re.kr go.kr ac.kr
|
|
22
|
+
co.in net.in org.in gov.in ac.in
|
|
23
|
+
co.il net.il org.il gov.il ac.il muni.il
|
|
24
|
+
com.mx net.mx org.mx gob.mx edu.mx
|
|
25
|
+
com.ar net.ar org.ar gov.ar
|
|
26
|
+
com.hk net.hk org.hk gov.hk edu.hk
|
|
27
|
+
com.tw net.tw org.tw gov.tw edu.tw
|
|
28
|
+
com.sg net.sg org.sg gov.sg edu.sg per.sg
|
|
29
|
+
com.tr net.tr org.tr gov.tr edu.tr k12.tr
|
|
30
|
+
].to_set.freeze
|
|
31
|
+
# rubocop:enable Layout/LineLength
|
|
32
|
+
|
|
33
|
+
IP_V4_RE = /\A\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\z/.freeze
|
|
34
|
+
|
|
35
|
+
module_function
|
|
36
|
+
|
|
37
|
+
# Given an authority (hostname, no port), return the registrable
|
|
38
|
+
# domain. Returns the input unchanged for IP literals, single-label
|
|
39
|
+
# hosts (`localhost`), and hosts that already match a 2-label apex.
|
|
40
|
+
def for(host)
|
|
41
|
+
return host if host.nil? || host.empty?
|
|
42
|
+
return host if IP_V4_RE.match?(host)
|
|
43
|
+
|
|
44
|
+
labels = host.split(".")
|
|
45
|
+
return host if labels.size <= 2
|
|
46
|
+
|
|
47
|
+
tail_two = labels.last(2).join(".")
|
|
48
|
+
if TWO_LABEL_SUFFIXES.include?(tail_two)
|
|
49
|
+
# Multi-label public suffix — keep last 3 labels (`foo.co.uk`).
|
|
50
|
+
labels.last(3).join(".")
|
|
51
|
+
else
|
|
52
|
+
labels.last(2).join(".")
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|