iriq 0.0.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/CLAUDE.md +121 -0
- data/Gemfile.lock +8 -2
- data/Makefile +56 -0
- data/README.md +334 -39
- data/iriq.gemspec +4 -3
- data/lib/iriq/cli.rb +289 -100
- data/lib/iriq/cluster.rb +47 -0
- data/lib/iriq/clusterer.rb +29 -39
- data/lib/iriq/corpus.rb +322 -0
- data/lib/iriq/explanation.rb +6 -22
- data/lib/iriq/extractor.rb +125 -0
- data/lib/iriq/identifier.rb +11 -3
- data/lib/iriq/inflector.rb +145 -0
- data/lib/iriq/normalizer.rb +11 -8
- data/lib/iriq/observation.rb +25 -0
- data/lib/iriq/parser.rb +1 -1
- data/lib/iriq/path_shape.rb +27 -9
- data/lib/iriq/position_stats.rb +64 -0
- data/lib/iriq/segment_classifier.rb +31 -7
- data/lib/iriq/segment_hints.rb +32 -0
- data/lib/iriq/storage/json.rb +43 -0
- data/lib/iriq/storage/memory.rb +138 -0
- data/lib/iriq/storage/sqlite.rb +367 -0
- data/lib/iriq/storage.rb +35 -0
- data/lib/iriq/version.rb +1 -1
- data/lib/iriq.rb +11 -0
- metadata +29 -4
data/lib/iriq/normalizer.rb
CHANGED
|
@@ -2,24 +2,27 @@ module Iriq
|
|
|
2
2
|
# Produces a canonical, shape-aware string for an identifier.
|
|
3
3
|
#
|
|
4
4
|
# Normalizer.normalize("https://Foo.com:443/users/123")
|
|
5
|
-
# # => "https://foo.com/users/{
|
|
5
|
+
# # => "https://foo.com/users/{user_id}"
|
|
6
6
|
#
|
|
7
7
|
# The form is intended for grouping/diffing — it is not a round-trippable URL.
|
|
8
8
|
module Normalizer
|
|
9
9
|
module_function
|
|
10
10
|
|
|
11
|
-
def normalize(input, classifier: SegmentClassifier
|
|
11
|
+
def normalize(input, classifier: SegmentClassifier::DEFAULT, hints: true)
|
|
12
12
|
iri = input.is_a?(Identifier) ? input : Parser.parse(input)
|
|
13
|
-
normalize_identifier(iri, classifier: classifier)
|
|
13
|
+
normalize_identifier(iri, classifier: classifier, hints: hints)
|
|
14
14
|
end
|
|
15
15
|
|
|
16
|
-
def normalize_identifier(iri, classifier: SegmentClassifier
|
|
16
|
+
def normalize_identifier(iri, classifier: SegmentClassifier::DEFAULT, hints: true)
|
|
17
17
|
if iri.urn?
|
|
18
|
-
# urn:isbn:0451450523 -> urn:isbn:{integer_id}
|
|
19
18
|
if iri.scheme == "urn" && iri.nss && iri.nss.include?(":")
|
|
20
19
|
ns, value = iri.nss.split(":", 2)
|
|
21
|
-
|
|
22
|
-
shaped =
|
|
20
|
+
entry = SegmentHints.derive([ns, value], classifier).last
|
|
21
|
+
shaped = if entry[:variable]
|
|
22
|
+
"{#{(hints && entry[:hint]) || entry[:type]}}"
|
|
23
|
+
else
|
|
24
|
+
entry[:value]
|
|
25
|
+
end
|
|
23
26
|
"urn:#{ns}:#{shaped}"
|
|
24
27
|
else
|
|
25
28
|
iri.canonical
|
|
@@ -29,7 +32,7 @@ module Iriq
|
|
|
29
32
|
out << "#{iri.scheme}://" if iri.scheme
|
|
30
33
|
out << iri.host if iri.host
|
|
31
34
|
out << ":#{iri.port}" if iri.port
|
|
32
|
-
out << PathShape.new(classifier: classifier).for(iri.path_segments)
|
|
35
|
+
out << PathShape.new(classifier: classifier, hints: hints).for(iri.path_segments)
|
|
33
36
|
if iri.query_params && !iri.query_params.empty?
|
|
34
37
|
out << "?" + shape_query(iri.query_params, classifier)
|
|
35
38
|
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
module Iriq
|
|
2
|
+
# The result of Corpus#observe. Lightweight value object — heavy work
|
|
3
|
+
# (explanation, normalization) is deferred until you ask.
|
|
4
|
+
class Observation
|
|
5
|
+
attr_reader :identifier, :cluster
|
|
6
|
+
|
|
7
|
+
def initialize(corpus:, identifier:, cluster:)
|
|
8
|
+
@corpus = corpus
|
|
9
|
+
@identifier = identifier
|
|
10
|
+
@cluster = cluster
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def fingerprint
|
|
14
|
+
@fingerprint ||= Normalizer.normalize_identifier(@identifier)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def explanation
|
|
18
|
+
@explanation ||= @corpus.explain(@identifier)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def normalize
|
|
22
|
+
@corpus.normalize(@identifier)
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
data/lib/iriq/parser.rb
CHANGED
|
@@ -3,7 +3,7 @@ module Iriq
|
|
|
3
3
|
#
|
|
4
4
|
# Intentionally NOT a full RFC 3986 / 3987 / WHATWG URL implementation. We
|
|
5
5
|
# accept enough of the common shapes (URLs, scheme-less hosts, URNs, raw
|
|
6
|
-
# Unicode hosts and paths) to support normalization and clustering.
|
|
6
|
+
# Unicode hosts and paths) to support extraction, normalization, and clustering.
|
|
7
7
|
module Parser
|
|
8
8
|
SCHEME_RE = /\A([a-zA-Z][a-zA-Z0-9+\-.]*):/.freeze
|
|
9
9
|
|
data/lib/iriq/path_shape.rb
CHANGED
|
@@ -1,27 +1,45 @@
|
|
|
1
1
|
module Iriq
|
|
2
2
|
# Converts a sequence of path segments into a route-shape string by
|
|
3
|
-
# replacing variable segments with `{
|
|
3
|
+
# replacing variable segments with `{hint}` placeholders, falling back to
|
|
4
|
+
# `{type}` when no hint is available.
|
|
4
5
|
#
|
|
5
6
|
# PathShape.for(["users", "123", "orders", "456"])
|
|
6
|
-
# # => "/users/{
|
|
7
|
+
# # => "/users/{user_id}/orders/{order_id}"
|
|
8
|
+
#
|
|
9
|
+
# Pass `hints: false` to use raw types instead:
|
|
10
|
+
#
|
|
11
|
+
# PathShape.for(["users", "123"], hints: false)
|
|
12
|
+
# # => "/users/{integer_id}"
|
|
7
13
|
class PathShape
|
|
8
|
-
def initialize(classifier: SegmentClassifier
|
|
14
|
+
def initialize(classifier: SegmentClassifier::DEFAULT, hints: true)
|
|
9
15
|
@classifier = classifier
|
|
16
|
+
@hints = hints
|
|
10
17
|
end
|
|
11
18
|
|
|
12
19
|
def for(segments)
|
|
13
20
|
return "/" if segments.nil? || segments.empty?
|
|
14
21
|
|
|
15
|
-
|
|
22
|
+
from_entries(SegmentHints.derive(segments, @classifier))
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Build a shape string from already-derived SegmentHints entries.
|
|
26
|
+
# Used by Corpus to avoid re-deriving entries per observation when it
|
|
27
|
+
# needs multiple shape variants (raw and hinted).
|
|
28
|
+
def from_entries(entries)
|
|
29
|
+
return "/" if entries.nil? || entries.empty?
|
|
30
|
+
|
|
31
|
+
"/" + entries.map { |e| shape_token(e) }.join("/")
|
|
16
32
|
end
|
|
17
33
|
|
|
18
|
-
def
|
|
19
|
-
|
|
20
|
-
|
|
34
|
+
def shape_token(entry)
|
|
35
|
+
return entry[:value] unless entry[:variable]
|
|
36
|
+
|
|
37
|
+
placeholder = @hints ? (entry[:hint] || entry[:type]) : entry[:type]
|
|
38
|
+
"{#{placeholder}}"
|
|
21
39
|
end
|
|
22
40
|
|
|
23
|
-
def self.for(segments, classifier: SegmentClassifier
|
|
24
|
-
new(classifier: classifier).for(segments)
|
|
41
|
+
def self.for(segments, classifier: SegmentClassifier::DEFAULT, hints: true)
|
|
42
|
+
new(classifier: classifier, hints: hints).for(segments)
|
|
25
43
|
end
|
|
26
44
|
end
|
|
27
45
|
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
module Iriq
|
|
2
|
+
# Rolling frequency counts for a single (host, prefix-shape, position).
|
|
3
|
+
# Value cardinality is capped so a high-entropy position (UUIDs, timestamps)
|
|
4
|
+
# doesn't grow memory without bound — `total` keeps growing accurately, but
|
|
5
|
+
# only the first `max_values` distinct values are tracked individually.
|
|
6
|
+
class PositionStats
|
|
7
|
+
DEFAULT_MAX_VALUES = 1_000
|
|
8
|
+
|
|
9
|
+
attr_reader :value_counts, :type_counts, :total, :max_values
|
|
10
|
+
|
|
11
|
+
def initialize(max_values: DEFAULT_MAX_VALUES)
|
|
12
|
+
@value_counts = Hash.new(0)
|
|
13
|
+
@type_counts = Hash.new(0)
|
|
14
|
+
@total = 0
|
|
15
|
+
@max_values = max_values
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def observe(value, type)
|
|
19
|
+
@total += 1
|
|
20
|
+
@type_counts[type] += 1
|
|
21
|
+
if @value_counts.size < @max_values || @value_counts.key?(value)
|
|
22
|
+
@value_counts[value] += 1
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def cardinality
|
|
27
|
+
@value_counts.size
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Fraction of observations whose type was variable (i.e. classifier said
|
|
31
|
+
# not :literal).
|
|
32
|
+
def variable_fraction(classifier)
|
|
33
|
+
return 0.0 if @total.zero?
|
|
34
|
+
|
|
35
|
+
var = @type_counts.sum { |t, c| classifier.variable?(t) ? c : 0 }
|
|
36
|
+
var.to_f / @total
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def value_fraction(value)
|
|
40
|
+
return 0.0 if @total.zero?
|
|
41
|
+
|
|
42
|
+
(@value_counts[value] || 0).to_f / @total
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def dump
|
|
46
|
+
{
|
|
47
|
+
"value_counts" => @value_counts,
|
|
48
|
+
"type_counts" => @type_counts.transform_keys(&:to_s),
|
|
49
|
+
"total" => @total,
|
|
50
|
+
"max_values" => @max_values,
|
|
51
|
+
}
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def self.from_dump(h)
|
|
55
|
+
stats = new(max_values: h["max_values"])
|
|
56
|
+
stats.instance_variable_set(:@total, h["total"])
|
|
57
|
+
vc = Hash.new(0).merge(h["value_counts"])
|
|
58
|
+
tc = Hash.new(0).merge(h["type_counts"].transform_keys(&:to_sym))
|
|
59
|
+
stats.instance_variable_set(:@value_counts, vc)
|
|
60
|
+
stats.instance_variable_set(:@type_counts, tc)
|
|
61
|
+
stats
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -20,9 +20,34 @@ module Iriq
|
|
|
20
20
|
TS_SECONDS_RANGE = 1_000_000_000..9_999_999_999
|
|
21
21
|
TS_MILLIS_RANGE = 1_000_000_000_000..9_999_999_999_999
|
|
22
22
|
|
|
23
|
+
# Bounded memoization: classification of a given string is pure, so
|
|
24
|
+
# repeat segments (e.g. /users in countless paths) can be cached. Cap
|
|
25
|
+
# keeps the cache from unbounded growth when inputs are dominated by
|
|
26
|
+
# unique IDs.
|
|
27
|
+
CACHE_MAX = 10_000
|
|
28
|
+
|
|
29
|
+
def initialize
|
|
30
|
+
@cache = {}
|
|
31
|
+
end
|
|
32
|
+
|
|
23
33
|
def classify(segment)
|
|
24
34
|
return :literal if segment.nil? || segment.empty?
|
|
25
35
|
|
|
36
|
+
cached = @cache[segment]
|
|
37
|
+
return cached if cached
|
|
38
|
+
|
|
39
|
+
@cache.clear if @cache.size >= CACHE_MAX
|
|
40
|
+
@cache[segment] = compute_classification(segment)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Anything except :literal is considered variable for shape/explain.
|
|
44
|
+
def variable?(type)
|
|
45
|
+
type != :literal
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
def compute_classification(segment)
|
|
26
51
|
case segment
|
|
27
52
|
when UUID_RE then :uuid
|
|
28
53
|
when DATE_RE then :date
|
|
@@ -36,13 +61,6 @@ module Iriq
|
|
|
36
61
|
end
|
|
37
62
|
end
|
|
38
63
|
|
|
39
|
-
# Anything except :literal is considered variable for shape/explain.
|
|
40
|
-
def variable?(type)
|
|
41
|
-
type != :literal
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
private
|
|
45
|
-
|
|
46
64
|
def classify_integer(segment)
|
|
47
65
|
n = segment.to_i
|
|
48
66
|
return :timestamp if TS_MILLIS_RANGE.cover?(n)
|
|
@@ -50,5 +68,11 @@ module Iriq
|
|
|
50
68
|
|
|
51
69
|
:integer_id
|
|
52
70
|
end
|
|
71
|
+
|
|
72
|
+
public
|
|
73
|
+
|
|
74
|
+
# Shared singleton — preferred default for callers that don't bring
|
|
75
|
+
# their own classifier (saves a per-call allocation).
|
|
76
|
+
DEFAULT = new
|
|
53
77
|
end
|
|
54
78
|
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
module Iriq
|
|
2
|
+
# Walks a segment list and annotates each entry with the type, whether it's
|
|
3
|
+
# variable, and a RESTful "hint" (e.g. `user_id`) when a variable segment
|
|
4
|
+
# follows a literal one — `/users/123` ⇒ hint `user_id`.
|
|
5
|
+
module SegmentHints
|
|
6
|
+
module_function
|
|
7
|
+
|
|
8
|
+
def derive(segments, classifier)
|
|
9
|
+
segments.each_with_index.map do |seg, i|
|
|
10
|
+
type = classifier.classify(seg)
|
|
11
|
+
variable = classifier.variable?(type)
|
|
12
|
+
{
|
|
13
|
+
value: seg,
|
|
14
|
+
type: type,
|
|
15
|
+
variable: variable,
|
|
16
|
+
hint: hint_for(segments, i, type, variable, classifier),
|
|
17
|
+
}
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def hint_for(segments, i, type, variable, classifier)
|
|
22
|
+
return nil unless variable && i > 0
|
|
23
|
+
|
|
24
|
+
prev = segments[i - 1]
|
|
25
|
+
return nil unless classifier.classify(prev) == :literal
|
|
26
|
+
|
|
27
|
+
base = Inflector.singularize(prev)
|
|
28
|
+
suffix = type == :uuid ? "_uuid" : "_id"
|
|
29
|
+
"#{base}#{suffix}"
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
require "json"
|
|
2
|
+
|
|
3
|
+
module Iriq
|
|
4
|
+
module Storage
|
|
5
|
+
# Json wraps Memory with load-from-file at open and save-to-file at close.
|
|
6
|
+
# Same JSON shape as the pre-Storage release, so files round-trip across
|
|
7
|
+
# versions.
|
|
8
|
+
class Json < Memory
|
|
9
|
+
attr_reader :path
|
|
10
|
+
|
|
11
|
+
def initialize(path: nil, **opts)
|
|
12
|
+
super(**opts)
|
|
13
|
+
@path = path
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def self.open(path, **opts)
|
|
17
|
+
s = new(path: path, **opts)
|
|
18
|
+
s.load!(path) if File.exist?(path) && File.size(path).positive?
|
|
19
|
+
s
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def load!(path)
|
|
23
|
+
data = File.read(path)
|
|
24
|
+
return self if data.empty?
|
|
25
|
+
|
|
26
|
+
load_dump!(JSON.parse(data))
|
|
27
|
+
@path = path
|
|
28
|
+
self
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# save writes atomically (tmp + rename). Defaults to the path passed at
|
|
32
|
+
# open(); pass an explicit path to write elsewhere.
|
|
33
|
+
def save(path = nil)
|
|
34
|
+
target = path || @path
|
|
35
|
+
raise ArgumentError, "no path provided" unless target
|
|
36
|
+
|
|
37
|
+
tmp = "#{target}.tmp"
|
|
38
|
+
File.write(tmp, JSON.generate(to_dump))
|
|
39
|
+
File.rename(tmp, target)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
module Iriq
|
|
2
|
+
module Storage
|
|
3
|
+
# Memory is the canonical backend — every other backend either wraps it
|
|
4
|
+
# (Json) or implements the same surface against an external store (Sqlite).
|
|
5
|
+
#
|
|
6
|
+
# The contract is small enough to enumerate up top:
|
|
7
|
+
#
|
|
8
|
+
# increment_host(host)
|
|
9
|
+
# increment_path_length(length)
|
|
10
|
+
# increment_raw_shape(shape)
|
|
11
|
+
# increment_fingerprint(shape)
|
|
12
|
+
# observe_position(host, prefix, value, type)
|
|
13
|
+
# add_to_cluster(key, host, scheme, shape, identifier)
|
|
14
|
+
#
|
|
15
|
+
# host_counts / path_length_counts / raw_shape_counts / fingerprint_counts
|
|
16
|
+
# position_stats(host, prefix)
|
|
17
|
+
# clusters / cluster_size
|
|
18
|
+
#
|
|
19
|
+
# transaction { ... } # backends may batch within
|
|
20
|
+
# flush # commit pending writes (no-op for Memory)
|
|
21
|
+
# close # release resources
|
|
22
|
+
class Memory
|
|
23
|
+
attr_reader :max_values_per_position
|
|
24
|
+
|
|
25
|
+
# Path of the underlying file, if any. Memory backends are unpathed;
|
|
26
|
+
# Json/Sqlite override.
|
|
27
|
+
def path; nil; end
|
|
28
|
+
|
|
29
|
+
def initialize(classifier: SegmentClassifier::DEFAULT,
|
|
30
|
+
max_values_per_position: PositionStats::DEFAULT_MAX_VALUES)
|
|
31
|
+
@classifier = classifier
|
|
32
|
+
@max_values_per_position = max_values_per_position
|
|
33
|
+
@host_counts = Hash.new(0)
|
|
34
|
+
@path_length_counts = Hash.new(0)
|
|
35
|
+
@raw_shape_counts = Hash.new(0)
|
|
36
|
+
@fingerprint_counts = Hash.new(0)
|
|
37
|
+
@position_stats = {}
|
|
38
|
+
@clusters = {}
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def transaction
|
|
42
|
+
yield self
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def batch
|
|
46
|
+
yield
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def flush; end
|
|
50
|
+
def close; end
|
|
51
|
+
|
|
52
|
+
# No-op for in-memory; subclasses override.
|
|
53
|
+
def save(path = nil); end
|
|
54
|
+
|
|
55
|
+
# --- Increments -------------------------------------------------------
|
|
56
|
+
|
|
57
|
+
def increment_host(host)
|
|
58
|
+
@host_counts[host] += 1 if host
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def increment_path_length(length)
|
|
62
|
+
@path_length_counts[length] += 1
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def increment_raw_shape(shape)
|
|
66
|
+
@raw_shape_counts[shape] += 1
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def increment_fingerprint(shape)
|
|
70
|
+
@fingerprint_counts[shape] += 1
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def observe_position(host, prefix, value, type)
|
|
74
|
+
stats = @position_stats[[host, prefix]] ||= PositionStats.new(max_values: @max_values_per_position)
|
|
75
|
+
stats.observe(value, type)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def add_to_cluster(key, host, scheme, shape, identifier)
|
|
79
|
+
cluster = @clusters[key] ||= Cluster.new(key: key, host: host, scheme: scheme, shape: shape)
|
|
80
|
+
cluster.add(identifier)
|
|
81
|
+
cluster
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# --- Reads ------------------------------------------------------------
|
|
85
|
+
|
|
86
|
+
def host_counts; @host_counts; end
|
|
87
|
+
def path_length_counts; @path_length_counts; end
|
|
88
|
+
def raw_shape_counts; @raw_shape_counts; end
|
|
89
|
+
def fingerprint_counts; @fingerprint_counts; end
|
|
90
|
+
|
|
91
|
+
def position_stats(host, prefix)
|
|
92
|
+
@position_stats[[host, prefix]]
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def each_position_stats(&block)
|
|
96
|
+
@position_stats.each(&block)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def clusters
|
|
100
|
+
@clusters.values
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def cluster_size
|
|
104
|
+
@clusters.size
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# --- Bulk load (used by JSON backend) --------------------------------
|
|
108
|
+
|
|
109
|
+
def load_dump!(h)
|
|
110
|
+
@host_counts = Hash.new(0).merge(h["host_counts"])
|
|
111
|
+
@path_length_counts = Hash.new(0).merge(h["path_length_counts"].transform_keys(&:to_i))
|
|
112
|
+
@raw_shape_counts = Hash.new(0).merge(h["raw_shape_counts"])
|
|
113
|
+
@fingerprint_counts = Hash.new(0).merge(h["fingerprint_counts"])
|
|
114
|
+
@max_values_per_position = h.fetch("max_values_per_position", PositionStats::DEFAULT_MAX_VALUES)
|
|
115
|
+
@position_stats = h["position_stats"].each_with_object({}) do |(host, prefix, sdump), acc|
|
|
116
|
+
acc[[host, prefix]] = PositionStats.from_dump(sdump)
|
|
117
|
+
end
|
|
118
|
+
cdump = h.fetch("clusterer", { "clusters" => {} })
|
|
119
|
+
@clusters = cdump["clusters"].transform_values { |c| Cluster.from_dump(c) }
|
|
120
|
+
self
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def to_dump
|
|
124
|
+
{
|
|
125
|
+
"host_counts" => @host_counts,
|
|
126
|
+
"path_length_counts" => @path_length_counts.transform_keys(&:to_s),
|
|
127
|
+
"raw_shape_counts" => @raw_shape_counts,
|
|
128
|
+
"fingerprint_counts" => @fingerprint_counts,
|
|
129
|
+
"max_values_per_position" => @max_values_per_position,
|
|
130
|
+
"position_stats" => @position_stats.map { |(host, prefix), s| [host, prefix, s.dump] },
|
|
131
|
+
"clusterer" => {
|
|
132
|
+
"clusters" => @clusters.transform_values(&:dump),
|
|
133
|
+
},
|
|
134
|
+
}
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|