iriq 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/CLAUDE.md +121 -0
- data/Gemfile.lock +8 -2
- data/Makefile +56 -0
- data/README.md +112 -11
- data/iriq.gemspec +4 -3
- data/lib/iriq/cli.rb +6 -5
- data/lib/iriq/cluster.rb +24 -0
- data/lib/iriq/clusterer.rb +19 -44
- data/lib/iriq/corpus.rb +123 -69
- data/lib/iriq/parser.rb +1 -1
- data/lib/iriq/storage/json.rb +43 -0
- data/lib/iriq/storage/memory.rb +138 -0
- data/lib/iriq/storage/sqlite.rb +367 -0
- data/lib/iriq/storage.rb +35 -0
- data/lib/iriq/version.rb +1 -1
- data/lib/iriq.rb +1 -0
- metadata +23 -6
- data/script/benchmark.rb +0 -81
- data/script/memory.rb +0 -121
data/lib/iriq/corpus.rb
CHANGED
|
@@ -7,6 +7,10 @@ module Iriq
|
|
|
7
7
|
#
|
|
8
8
|
# The deterministic, single-IRI API (Iriq.normalize/explain) is unchanged —
|
|
9
9
|
# Corpus#normalize and Corpus#explain are the corpus-informed variants.
|
|
10
|
+
#
|
|
11
|
+
# State lives in a Storage backend (Memory by default; Json or Sqlite when
|
|
12
|
+
# opened against a file). The classification logic on top is identical
|
|
13
|
+
# regardless of where the counters live.
|
|
10
14
|
class Corpus
|
|
11
15
|
# Type-based: position is "mostly variable" (UUIDs/integers/etc.).
|
|
12
16
|
VARIABLE_DOMINANCE_THRESHOLD = 0.8
|
|
@@ -38,28 +42,53 @@ module Iriq
|
|
|
38
42
|
POPULAR_MIN_COUNT = 5
|
|
39
43
|
POPULAR_BASELINE_MULTIPLE = 3
|
|
40
44
|
|
|
41
|
-
attr_reader :
|
|
42
|
-
:fingerprint_counts, :position_stats
|
|
45
|
+
attr_reader :storage
|
|
43
46
|
|
|
44
47
|
def initialize(classifier: SegmentClassifier::DEFAULT,
|
|
45
|
-
max_values_per_position: PositionStats::DEFAULT_MAX_VALUES
|
|
46
|
-
|
|
47
|
-
@
|
|
48
|
-
@
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
48
|
+
max_values_per_position: PositionStats::DEFAULT_MAX_VALUES,
|
|
49
|
+
storage: nil)
|
|
50
|
+
@classifier = classifier
|
|
51
|
+
@storage = storage || Storage::Memory.new(
|
|
52
|
+
classifier: classifier,
|
|
53
|
+
max_values_per_position: max_values_per_position,
|
|
54
|
+
)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Open a corpus against `path`. File extension picks the backend:
|
|
58
|
+
# `.db`/`.sqlite`/`.sqlite3` use SQLite (incremental writes); anything
|
|
59
|
+
# else uses JSON.
|
|
60
|
+
def self.open(path, classifier: SegmentClassifier::DEFAULT,
|
|
61
|
+
max_values_per_position: PositionStats::DEFAULT_MAX_VALUES)
|
|
62
|
+
storage = Storage.open(path,
|
|
63
|
+
classifier: classifier,
|
|
64
|
+
max_values_per_position: max_values_per_position)
|
|
65
|
+
new(classifier: classifier, storage: storage)
|
|
54
66
|
end
|
|
55
67
|
|
|
56
68
|
# Observe a single IRI. Returns an Observation.
|
|
57
69
|
def observe(input)
|
|
58
70
|
iri = coerce(input)
|
|
59
71
|
hinted_entries = SegmentHints.derive(iri.path_segments, @classifier)
|
|
60
|
-
|
|
72
|
+
raw_shape = PathShape.new(classifier: @classifier, hints: false).from_entries(hinted_entries)
|
|
61
73
|
hinted_shape = PathShape.new(classifier: @classifier, hints: true).from_entries(hinted_entries)
|
|
62
|
-
|
|
74
|
+
|
|
75
|
+
cluster = nil
|
|
76
|
+
@storage.transaction do |s|
|
|
77
|
+
s.increment_host(iri.host)
|
|
78
|
+
s.increment_path_length(iri.path_segments.size)
|
|
79
|
+
s.increment_raw_shape(raw_shape)
|
|
80
|
+
s.increment_fingerprint(hinted_shape)
|
|
81
|
+
|
|
82
|
+
prefix = ""
|
|
83
|
+
hinted_entries.each do |entry|
|
|
84
|
+
s.observe_position(iri.host, prefix, entry[:value], entry[:type])
|
|
85
|
+
prefix = "#{prefix}/#{placeholder(entry)}"
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
key, host, scheme, shape = Cluster.key_for(iri, classifier: @classifier, shape: hinted_shape)
|
|
89
|
+
cluster = s.add_to_cluster(key, host, scheme, shape, iri)
|
|
90
|
+
end
|
|
91
|
+
|
|
63
92
|
Observation.new(corpus: self, identifier: iri, cluster: cluster)
|
|
64
93
|
end
|
|
65
94
|
|
|
@@ -89,55 +118,69 @@ module Iriq
|
|
|
89
118
|
end
|
|
90
119
|
end
|
|
91
120
|
|
|
121
|
+
def host_counts; @storage.host_counts; end
|
|
122
|
+
def path_length_counts; @storage.path_length_counts; end
|
|
123
|
+
def raw_shape_counts; @storage.raw_shape_counts; end
|
|
124
|
+
def fingerprint_counts; @storage.fingerprint_counts; end
|
|
125
|
+
|
|
126
|
+
# Iterates (host, prefix) → PositionStats over all observed positions.
|
|
127
|
+
# Used by inspection tooling; not part of the hot path.
|
|
128
|
+
def each_position_stats(&block)
|
|
129
|
+
@storage.each_position_stats(&block)
|
|
130
|
+
end
|
|
131
|
+
|
|
92
132
|
def clusters
|
|
93
|
-
@
|
|
133
|
+
@storage.clusters
|
|
94
134
|
end
|
|
95
135
|
|
|
96
136
|
def size
|
|
97
|
-
@
|
|
137
|
+
@storage.cluster_size
|
|
98
138
|
end
|
|
99
139
|
|
|
100
140
|
# Stats for a given (host, prefix_shape) — useful for tests and
|
|
101
141
|
# debugging. Returns nil if nothing has been observed there.
|
|
102
142
|
def stats_for(host, prefix)
|
|
103
|
-
@position_stats
|
|
143
|
+
@storage.position_stats(host, prefix)
|
|
104
144
|
end
|
|
105
145
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
146
|
+
# Persist the corpus.
|
|
147
|
+
#
|
|
148
|
+
# save() → flush the backend in place (JSON writes its file,
|
|
149
|
+
# SQLite is already on disk).
|
|
150
|
+
# save(same_path) → same as save() — idempotent for the backend's path.
|
|
151
|
+
# save(other_path) → export to other_path as JSON, regardless of the
|
|
152
|
+
# live backend.
|
|
153
|
+
def save(path = nil)
|
|
154
|
+
backend_path = @storage.respond_to?(:path) ? @storage.path : nil
|
|
155
|
+
if path.nil? || path == backend_path
|
|
156
|
+
@storage.save
|
|
157
|
+
else
|
|
158
|
+
write_json_dump(path)
|
|
159
|
+
end
|
|
110
160
|
end
|
|
111
161
|
|
|
112
|
-
def
|
|
113
|
-
@
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
raw = PathShape.new(classifier: @classifier, hints: false).from_entries(hinted_entries)
|
|
117
|
-
fp = PathShape.new(classifier: @classifier, hints: true).from_entries(hinted_entries)
|
|
118
|
-
@raw_shape_counts[raw] += 1
|
|
119
|
-
@fingerprint_counts[fp] += 1
|
|
162
|
+
def close
|
|
163
|
+
@storage.close
|
|
164
|
+
end
|
|
120
165
|
|
|
121
|
-
|
|
166
|
+
# Wrap many observations in a single backend transaction. For SQLite this
|
|
167
|
+
# turns thousands of fsyncs into one; for in-memory backends it's a
|
|
168
|
+
# no-op. Use when ingesting a batch.
|
|
169
|
+
def batch(&block)
|
|
170
|
+
@storage.batch(&block)
|
|
122
171
|
end
|
|
123
172
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
stats = @position_stats[key] ||= PositionStats.new(max_values: @max_values_per_position)
|
|
129
|
-
stats.observe(entry[:value], entry[:type])
|
|
130
|
-
prefix = "#{prefix}/#{placeholder(entry)}"
|
|
131
|
-
end
|
|
173
|
+
private
|
|
174
|
+
|
|
175
|
+
def coerce(input)
|
|
176
|
+
input.is_a?(Identifier) ? input : Parser.parse(input)
|
|
132
177
|
end
|
|
133
178
|
|
|
134
|
-
# Walks the IRI's segments and returns hint-derived entries enriched with
|
|
135
|
-
# the (host, prefix) PositionStats reference and a :classification symbol.
|
|
136
179
|
def annotate_segments(iri)
|
|
137
180
|
hinted = SegmentHints.derive(iri.path_segments, @classifier)
|
|
138
181
|
prefix = ""
|
|
139
182
|
hinted.map do |entry|
|
|
140
|
-
stats = @position_stats
|
|
183
|
+
stats = @storage.position_stats(iri.host, prefix)
|
|
141
184
|
out = entry.merge(
|
|
142
185
|
prefix: prefix,
|
|
143
186
|
classification: classify(entry, stats),
|
|
@@ -226,43 +269,54 @@ module Iriq
|
|
|
226
269
|
|
|
227
270
|
public
|
|
228
271
|
|
|
272
|
+
# --- Legacy dump/load (JSON shape) ------------------------------------
|
|
273
|
+
#
|
|
274
|
+
# The pre-Storage release exposed `Corpus#dump`, `Corpus#save(path)`, and
|
|
275
|
+
# `Corpus.load(path)` for JSON-backed persistence. Those names still work
|
|
276
|
+
# but are now thin wrappers around the appropriate Storage backend.
|
|
277
|
+
|
|
229
278
|
def dump
|
|
230
|
-
|
|
231
|
-
"host_counts" => @host_counts,
|
|
232
|
-
"path_length_counts" => @path_length_counts.transform_keys(&:to_s),
|
|
233
|
-
"raw_shape_counts" => @raw_shape_counts,
|
|
234
|
-
"fingerprint_counts" => @fingerprint_counts,
|
|
235
|
-
"max_values_per_position" => @max_values_per_position,
|
|
236
|
-
"position_stats" => @position_stats.map { |(host, prefix), s| [host, prefix, s.dump] },
|
|
237
|
-
"clusterer" => @clusterer.dump,
|
|
238
|
-
}
|
|
279
|
+
memory_view.to_dump
|
|
239
280
|
end
|
|
240
281
|
|
|
241
|
-
def
|
|
282
|
+
def self.from_dump(h, classifier: SegmentClassifier::DEFAULT)
|
|
283
|
+
max_values = h.fetch("max_values_per_position", PositionStats::DEFAULT_MAX_VALUES)
|
|
284
|
+
storage = Storage::Memory.new(classifier: classifier, max_values_per_position: max_values)
|
|
285
|
+
storage.load_dump!(h)
|
|
286
|
+
new(classifier: classifier, storage: storage)
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
def self.load(path, classifier: SegmentClassifier::DEFAULT)
|
|
290
|
+
open(path, classifier: classifier)
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
private
|
|
294
|
+
|
|
295
|
+
def write_json_dump(path)
|
|
242
296
|
tmp = "#{path}.tmp"
|
|
243
|
-
File.write(tmp, JSON.generate(
|
|
297
|
+
File.write(tmp, JSON.generate(memory_view.to_dump))
|
|
244
298
|
File.rename(tmp, path)
|
|
245
299
|
end
|
|
246
300
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
)
|
|
252
|
-
c.instance_variable_set(:@host_counts, Hash.new(0).merge(h["host_counts"]))
|
|
253
|
-
c.instance_variable_set(:@path_length_counts, Hash.new(0).merge(h["path_length_counts"].transform_keys(&:to_i)))
|
|
254
|
-
c.instance_variable_set(:@raw_shape_counts, Hash.new(0).merge(h["raw_shape_counts"]))
|
|
255
|
-
c.instance_variable_set(:@fingerprint_counts, Hash.new(0).merge(h["fingerprint_counts"]))
|
|
256
|
-
stats = h["position_stats"].each_with_object({}) do |(host, prefix, sdump), acc|
|
|
257
|
-
acc[[host, prefix]] = PositionStats.from_dump(sdump)
|
|
258
|
-
end
|
|
259
|
-
c.instance_variable_set(:@position_stats, stats)
|
|
260
|
-
c.instance_variable_set(:@clusterer, Clusterer.from_dump(h["clusterer"], classifier: classifier))
|
|
261
|
-
c
|
|
262
|
-
end
|
|
301
|
+
# Materialize a Memory snapshot of the current state — used by dump for
|
|
302
|
+
# backends that don't natively know how to emit the JSON shape.
|
|
303
|
+
def memory_view
|
|
304
|
+
return @storage if @storage.respond_to?(:to_dump)
|
|
263
305
|
|
|
264
|
-
|
|
265
|
-
|
|
306
|
+
mem = Storage::Memory.new(
|
|
307
|
+
classifier: @classifier,
|
|
308
|
+
max_values_per_position: @storage.max_values_per_position,
|
|
309
|
+
)
|
|
310
|
+
mem.instance_variable_set(:@host_counts, Hash.new(0).merge(@storage.host_counts))
|
|
311
|
+
mem.instance_variable_set(:@path_length_counts, Hash.new(0).merge(@storage.path_length_counts))
|
|
312
|
+
mem.instance_variable_set(:@raw_shape_counts, Hash.new(0).merge(@storage.raw_shape_counts))
|
|
313
|
+
mem.instance_variable_set(:@fingerprint_counts, Hash.new(0).merge(@storage.fingerprint_counts))
|
|
314
|
+
ps = {}
|
|
315
|
+
@storage.each_position_stats { |key, stats| ps[key] = stats }
|
|
316
|
+
mem.instance_variable_set(:@position_stats, ps)
|
|
317
|
+
clusters_h = @storage.clusters.each_with_object({}) { |c, h| h[c.key] = c }
|
|
318
|
+
mem.instance_variable_set(:@clusters, clusters_h)
|
|
319
|
+
mem
|
|
266
320
|
end
|
|
267
321
|
end
|
|
268
322
|
end
|
data/lib/iriq/parser.rb
CHANGED
|
@@ -3,7 +3,7 @@ module Iriq
|
|
|
3
3
|
#
|
|
4
4
|
# Intentionally NOT a full RFC 3986 / 3987 / WHATWG URL implementation. We
|
|
5
5
|
# accept enough of the common shapes (URLs, scheme-less hosts, URNs, raw
|
|
6
|
-
# Unicode hosts and paths) to support normalization and clustering.
|
|
6
|
+
# Unicode hosts and paths) to support extraction, normalization, and clustering.
|
|
7
7
|
module Parser
|
|
8
8
|
SCHEME_RE = /\A([a-zA-Z][a-zA-Z0-9+\-.]*):/.freeze
|
|
9
9
|
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
require "json"
|
|
2
|
+
|
|
3
|
+
module Iriq
|
|
4
|
+
module Storage
|
|
5
|
+
# Json wraps Memory with load-from-file at open and save-to-file at close.
|
|
6
|
+
# Same JSON shape as the pre-Storage release, so files round-trip across
|
|
7
|
+
# versions.
|
|
8
|
+
class Json < Memory
|
|
9
|
+
attr_reader :path
|
|
10
|
+
|
|
11
|
+
def initialize(path: nil, **opts)
|
|
12
|
+
super(**opts)
|
|
13
|
+
@path = path
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def self.open(path, **opts)
|
|
17
|
+
s = new(path: path, **opts)
|
|
18
|
+
s.load!(path) if File.exist?(path) && File.size(path).positive?
|
|
19
|
+
s
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def load!(path)
|
|
23
|
+
data = File.read(path)
|
|
24
|
+
return self if data.empty?
|
|
25
|
+
|
|
26
|
+
load_dump!(JSON.parse(data))
|
|
27
|
+
@path = path
|
|
28
|
+
self
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# save writes atomically (tmp + rename). Defaults to the path passed at
|
|
32
|
+
# open(); pass an explicit path to write elsewhere.
|
|
33
|
+
def save(path = nil)
|
|
34
|
+
target = path || @path
|
|
35
|
+
raise ArgumentError, "no path provided" unless target
|
|
36
|
+
|
|
37
|
+
tmp = "#{target}.tmp"
|
|
38
|
+
File.write(tmp, JSON.generate(to_dump))
|
|
39
|
+
File.rename(tmp, target)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
module Iriq
|
|
2
|
+
module Storage
|
|
3
|
+
# Memory is the canonical backend — every other backend either wraps it
|
|
4
|
+
# (Json) or implements the same surface against an external store (Sqlite).
|
|
5
|
+
#
|
|
6
|
+
# The contract is small enough to enumerate up top:
|
|
7
|
+
#
|
|
8
|
+
# increment_host(host)
|
|
9
|
+
# increment_path_length(length)
|
|
10
|
+
# increment_raw_shape(shape)
|
|
11
|
+
# increment_fingerprint(shape)
|
|
12
|
+
# observe_position(host, prefix, value, type)
|
|
13
|
+
# add_to_cluster(key, host, scheme, shape, identifier)
|
|
14
|
+
#
|
|
15
|
+
# host_counts / path_length_counts / raw_shape_counts / fingerprint_counts
|
|
16
|
+
# position_stats(host, prefix)
|
|
17
|
+
# clusters / cluster_size
|
|
18
|
+
#
|
|
19
|
+
# transaction { ... } # backends may batch within
|
|
20
|
+
# flush # commit pending writes (no-op for Memory)
|
|
21
|
+
# close # release resources
|
|
22
|
+
class Memory
|
|
23
|
+
attr_reader :max_values_per_position
|
|
24
|
+
|
|
25
|
+
# Path of the underlying file, if any. Memory backends are unpathed;
|
|
26
|
+
# Json/Sqlite override.
|
|
27
|
+
def path; nil; end
|
|
28
|
+
|
|
29
|
+
def initialize(classifier: SegmentClassifier::DEFAULT,
|
|
30
|
+
max_values_per_position: PositionStats::DEFAULT_MAX_VALUES)
|
|
31
|
+
@classifier = classifier
|
|
32
|
+
@max_values_per_position = max_values_per_position
|
|
33
|
+
@host_counts = Hash.new(0)
|
|
34
|
+
@path_length_counts = Hash.new(0)
|
|
35
|
+
@raw_shape_counts = Hash.new(0)
|
|
36
|
+
@fingerprint_counts = Hash.new(0)
|
|
37
|
+
@position_stats = {}
|
|
38
|
+
@clusters = {}
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def transaction
|
|
42
|
+
yield self
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def batch
|
|
46
|
+
yield
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def flush; end
|
|
50
|
+
def close; end
|
|
51
|
+
|
|
52
|
+
# No-op for in-memory; subclasses override.
|
|
53
|
+
def save(path = nil); end
|
|
54
|
+
|
|
55
|
+
# --- Increments -------------------------------------------------------
|
|
56
|
+
|
|
57
|
+
def increment_host(host)
|
|
58
|
+
@host_counts[host] += 1 if host
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def increment_path_length(length)
|
|
62
|
+
@path_length_counts[length] += 1
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def increment_raw_shape(shape)
|
|
66
|
+
@raw_shape_counts[shape] += 1
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def increment_fingerprint(shape)
|
|
70
|
+
@fingerprint_counts[shape] += 1
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def observe_position(host, prefix, value, type)
|
|
74
|
+
stats = @position_stats[[host, prefix]] ||= PositionStats.new(max_values: @max_values_per_position)
|
|
75
|
+
stats.observe(value, type)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def add_to_cluster(key, host, scheme, shape, identifier)
|
|
79
|
+
cluster = @clusters[key] ||= Cluster.new(key: key, host: host, scheme: scheme, shape: shape)
|
|
80
|
+
cluster.add(identifier)
|
|
81
|
+
cluster
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# --- Reads ------------------------------------------------------------
|
|
85
|
+
|
|
86
|
+
def host_counts; @host_counts; end
|
|
87
|
+
def path_length_counts; @path_length_counts; end
|
|
88
|
+
def raw_shape_counts; @raw_shape_counts; end
|
|
89
|
+
def fingerprint_counts; @fingerprint_counts; end
|
|
90
|
+
|
|
91
|
+
def position_stats(host, prefix)
|
|
92
|
+
@position_stats[[host, prefix]]
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def each_position_stats(&block)
|
|
96
|
+
@position_stats.each(&block)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def clusters
|
|
100
|
+
@clusters.values
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def cluster_size
|
|
104
|
+
@clusters.size
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# --- Bulk load (used by JSON backend) --------------------------------
|
|
108
|
+
|
|
109
|
+
def load_dump!(h)
|
|
110
|
+
@host_counts = Hash.new(0).merge(h["host_counts"])
|
|
111
|
+
@path_length_counts = Hash.new(0).merge(h["path_length_counts"].transform_keys(&:to_i))
|
|
112
|
+
@raw_shape_counts = Hash.new(0).merge(h["raw_shape_counts"])
|
|
113
|
+
@fingerprint_counts = Hash.new(0).merge(h["fingerprint_counts"])
|
|
114
|
+
@max_values_per_position = h.fetch("max_values_per_position", PositionStats::DEFAULT_MAX_VALUES)
|
|
115
|
+
@position_stats = h["position_stats"].each_with_object({}) do |(host, prefix, sdump), acc|
|
|
116
|
+
acc[[host, prefix]] = PositionStats.from_dump(sdump)
|
|
117
|
+
end
|
|
118
|
+
cdump = h.fetch("clusterer", { "clusters" => {} })
|
|
119
|
+
@clusters = cdump["clusters"].transform_values { |c| Cluster.from_dump(c) }
|
|
120
|
+
self
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def to_dump
|
|
124
|
+
{
|
|
125
|
+
"host_counts" => @host_counts,
|
|
126
|
+
"path_length_counts" => @path_length_counts.transform_keys(&:to_s),
|
|
127
|
+
"raw_shape_counts" => @raw_shape_counts,
|
|
128
|
+
"fingerprint_counts" => @fingerprint_counts,
|
|
129
|
+
"max_values_per_position" => @max_values_per_position,
|
|
130
|
+
"position_stats" => @position_stats.map { |(host, prefix), s| [host, prefix, s.dump] },
|
|
131
|
+
"clusterer" => {
|
|
132
|
+
"clusters" => @clusters.transform_values(&:dump),
|
|
133
|
+
},
|
|
134
|
+
}
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|