iriq 0.2.0 → 0.30.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/iriq/trace.rb ADDED
@@ -0,0 +1,294 @@
1
+ module Iriq
2
+ # Produces an annotated trace explaining how an identifier got
3
+ # normalized — segment by segment, with notes for each non-obvious
4
+ # transformation (currency upcase, IP umbrella, hint suppression,
5
+ # canonical date, param-name lift, etc.).
6
+ #
7
+ # Trace.for("https://shop.com/pricing/usd?currency=eur")
8
+ # # => {
9
+ # # input: "...",
10
+ # # normalized: "https://shop.com/pricing/USD?currency=EUR",
11
+ # # scheme: "https", host: "shop.com",
12
+ # # path: [...per-segment rows...],
13
+ # # query: [...per-param rows...],
14
+ # # }
15
+ #
16
+ # Each row is `{ value, type, output, notes }` for path entries and
17
+ # `{ name, value, type, output, notes }` for query entries. The string
18
+ # notes are rendered from structured Iriq::Evidence::Record values;
19
+ # callers that want the structured form can use Trace.evidence_for.
20
+ module Trace
21
+ module_function
22
+
23
+ HINT_NOTE_TEMPLATE = "semantic type — surfaced as {%s}, not {%s}".freeze
24
+
25
+ # Render-ready Trace output. The public format consumers depend on.
26
+ def for(input, classifier: SegmentClassifier::DEFAULT, hints: true)
27
+ iri = coerce(input)
28
+ normalized = Normalizer.normalize_identifier(iri, classifier: classifier, hints: hints)
29
+
30
+ out = {
31
+ input: iri.canonical,
32
+ normalized: normalized,
33
+ scheme: iri.scheme,
34
+ host: iri.host,
35
+ }
36
+ out[:port] = iri.port if iri.port
37
+
38
+ if iri.urn?
39
+ out[:path] = urn_rows(iri, classifier, hints)
40
+ else
41
+ out[:path] = path_rows(iri.path_segments, classifier, hints)
42
+ if iri.query_params && !iri.query_params.empty?
43
+ out[:query] = query_rows(iri.query_params, classifier)
44
+ end
45
+ end
46
+
47
+ out
48
+ end
49
+
50
+ # Structured Evidence list for `input`. Each segment + query param
51
+ # contributes one classification Evidence plus zero or more
52
+ # transformation Evidence records (canonical date, IP umbrella
53
+ # collapse, param-name hint, hint suppression).
54
+ #
55
+ # Position + Cluster Evidence are not emitted here — they belong to
56
+ # corpus-informed trace (Corpus#trace), which a follow-up step lands.
57
+ def evidence_for(input, classifier: SegmentClassifier::DEFAULT, hints: true)
58
+ iri = coerce(input)
59
+ records = []
60
+ segments = iri.urn? ? urn_parts(iri) : (iri.path_segments || [])
61
+ entries = SegmentHints.derive(segments, classifier)
62
+
63
+ entries.each_with_index do |entry, i|
64
+ records.concat(segment_evidence(entry, segments, i, classifier, hints))
65
+ end
66
+
67
+ if !iri.urn? && iri.query_params && !iri.query_params.empty?
68
+ iri.query_params.keys.sort.each do |k|
69
+ records.concat(query_param_evidence(k, iri.query_params[k].to_s, classifier))
70
+ end
71
+ end
72
+
73
+ records
74
+ end
75
+
76
+ # ── Evidence builders ────────────────────────────────────────────────
77
+
78
+ def segment_evidence(entry, segments, idx, classifier, hints)
79
+ records = []
80
+
81
+ records << Evidence.segment(
82
+ index: idx, value: entry[:value],
83
+ source: :recognizer,
84
+ payload: { type: entry[:type], variable: entry[:variable], hint: entry[:hint] },
85
+ )
86
+
87
+ if entry[:variable]
88
+ if entry[:type] == :date && (canon = SegmentClassifier.canonical_date(entry[:value]))
89
+ if canon != entry[:value]
90
+ records << Evidence.segment(
91
+ index: idx, value: entry[:value],
92
+ source: :policy,
93
+ payload: { rule: :canonical_date, before: entry[:value], after: canon },
94
+ notes: ["canonical date (#{entry[:value]} → #{canon})"],
95
+ )
96
+ end
97
+ elsif entry[:type] == :currency && (canon = SegmentClassifier.canonical_currency(entry[:value]))
98
+ if canon != entry[:value]
99
+ records << Evidence.segment(
100
+ index: idx, value: entry[:value],
101
+ source: :policy,
102
+ payload: { rule: :canonical_currency, before: entry[:value], after: canon },
103
+ notes: ["currency upcase (#{entry[:value]} → #{canon})"],
104
+ )
105
+ end
106
+ else
107
+ extra = placeholder_decoration_evidence(entry, segments, idx, classifier, hints)
108
+ records.concat(extra)
109
+ end
110
+ end
111
+
112
+ records
113
+ end
114
+
115
+ def placeholder_decoration_evidence(entry, segments, idx, classifier, hints)
116
+ out = []
117
+ type = entry[:type]
118
+
119
+ if type == :ipv4 || type == :ipv6
120
+ out << Evidence.segment(
121
+ index: idx, value: entry[:value],
122
+ source: :policy,
123
+ payload: { rule: :ip_umbrella_collapse, from: type, to: :ip },
124
+ notes: ["ip umbrella collapse (#{type} → ip)"],
125
+ )
126
+ end
127
+
128
+ if hints && entry[:hint].nil? && !SegmentHints::HINT_ELIGIBLE_TYPES.include?(type)
129
+ if (would_be = would_be_hint(segments, idx, type, classifier))
130
+ display = SegmentClassifier.display_type(type)
131
+ out << Evidence.segment(
132
+ index: idx, value: entry[:value],
133
+ source: :neighbor,
134
+ payload: { rule: :hint_suppression, surfaced: display, would_be: would_be, semantic_type: type },
135
+ notes: [format(HINT_NOTE_TEMPLATE, display, would_be)],
136
+ )
137
+ end
138
+ end
139
+
140
+ out
141
+ end
142
+
143
+ def query_param_evidence(name, value, classifier)
144
+ records = []
145
+ base_type = classifier.classify(value)
146
+ effective = base_type
147
+
148
+ if (hint = SegmentClassifier.param_name_hint(name, base_type))
149
+ effective = hint
150
+ records << Evidence.segment(
151
+ index: name, value: value,
152
+ source: :neighbor,
153
+ payload: { rule: :param_name_hint, name: name, before: base_type, after: hint },
154
+ notes: ["param-name hint (`#{name}=`) lifted #{base_type} → #{hint}"],
155
+ )
156
+ end
157
+
158
+ records << Evidence.segment(
159
+ index: name, value: value,
160
+ source: :recognizer,
161
+ payload: { type: effective, variable: SegmentClassifier::DEFAULT.variable?(effective) },
162
+ )
163
+
164
+ if effective == :date && (canon = SegmentClassifier.canonical_date(value))
165
+ if canon != value
166
+ records << Evidence.segment(
167
+ index: name, value: value,
168
+ source: :policy,
169
+ payload: { rule: :canonical_date, before: value, after: canon },
170
+ notes: ["canonical date (#{value} → #{canon})"],
171
+ )
172
+ end
173
+ elsif effective == :currency && (canon = SegmentClassifier.canonical_currency(value))
174
+ if canon != value
175
+ records << Evidence.segment(
176
+ index: name, value: value,
177
+ source: :policy,
178
+ payload: { rule: :canonical_currency, before: value, after: canon },
179
+ notes: ["currency upcase (#{value} → #{canon})"],
180
+ )
181
+ end
182
+ elsif effective == :ipv4 || effective == :ipv6
183
+ records << Evidence.segment(
184
+ index: name, value: value,
185
+ source: :policy,
186
+ payload: { rule: :ip_umbrella_collapse, from: effective, to: :ip },
187
+ notes: ["ip umbrella collapse (#{effective} → ip)"],
188
+ )
189
+ end
190
+
191
+ records
192
+ end
193
+
194
+ # ── View rendering (Evidence → Trace.for hash) ───────────────────────
195
+
196
+ def path_rows(segments, classifier, hints)
197
+ return [] if segments.nil? || segments.empty?
198
+
199
+ entries = SegmentHints.derive(segments, classifier)
200
+ entries.each_with_index.map do |entry, i|
201
+ ev = segment_evidence(entry, segments, i, classifier, hints)
202
+ render_segment_row(entry, ev, hints)
203
+ end
204
+ end
205
+
206
+ def urn_rows(iri, classifier, hints)
207
+ parts = urn_parts(iri)
208
+ return [] if parts.empty?
209
+
210
+ entries = SegmentHints.derive(parts, classifier)
211
+ entries.each_with_index.map do |entry, i|
212
+ ev = segment_evidence(entry, parts, i, classifier, hints)
213
+ render_segment_row(entry, ev, hints)
214
+ end
215
+ end
216
+
217
+ def query_rows(params, classifier)
218
+ params.keys.sort.map do |k|
219
+ v = params[k].to_s
220
+ ev = query_param_evidence(k, v, classifier)
221
+ render_query_row(k, v, ev)
222
+ end
223
+ end
224
+
225
+ def render_segment_row(entry, evidence, hints)
226
+ notes = collect_notes(evidence)
227
+ value = entry[:value]
228
+ type = entry[:type]
229
+
230
+ return { value: value, type: type, output: value, notes: notes } unless entry[:variable]
231
+
232
+ canon_policy = find_payload(evidence, :canonical_date) || find_payload(evidence, :canonical_currency)
233
+ if canon_policy
234
+ return { value: value, type: type, output: canon_policy[:after], notes: notes }
235
+ end
236
+
237
+ placeholder = hints && entry[:hint] ? entry[:hint].to_s : SegmentClassifier.display_type(type).to_s
238
+ { value: value, type: type, output: "{#{placeholder}}", notes: notes }
239
+ end
240
+
241
+ def render_query_row(name, value, evidence)
242
+ notes = collect_notes(evidence)
243
+ cls = find_evidence(evidence, source: :recognizer)
244
+ effective = cls ? cls.payload[:type] : SegmentClassifier::DEFAULT.classify(value)
245
+ canon_policy = find_payload(evidence, :canonical_date) || find_payload(evidence, :canonical_currency)
246
+
247
+ output =
248
+ if canon_policy
249
+ canon_policy[:after]
250
+ elsif SegmentClassifier::DEFAULT.variable?(effective)
251
+ "{#{SegmentClassifier.display_type(effective)}}"
252
+ else
253
+ value
254
+ end
255
+
256
+ { name: name, value: value, type: effective, output: output, notes: notes }
257
+ end
258
+
259
+ # ── Helpers ──────────────────────────────────────────────────────────
260
+
261
+ def coerce(input)
262
+ input.is_a?(Identifier) ? input : Parser.parse(input)
263
+ end
264
+
265
+ def urn_parts(iri)
266
+ return [] unless iri.nss
267
+ iri.nss.include?(":") ? iri.nss.split(":", 2) : [iri.nss]
268
+ end
269
+
270
+ def collect_notes(evidence)
271
+ evidence.flat_map(&:notes)
272
+ end
273
+
274
+ def find_evidence(evidence, source:)
275
+ evidence.find { |r| r.source == source }
276
+ end
277
+
278
+ def find_payload(evidence, rule)
279
+ r = evidence.find { |e| e.source == :policy && e.payload[:rule] == rule }
280
+ r&.payload
281
+ end
282
+
283
+ def would_be_hint(segments, idx, type, classifier)
284
+ return nil if idx.zero?
285
+
286
+ prev = segments[idx - 1]
287
+ return nil unless classifier.classify(prev) == :literal
288
+
289
+ base = Inflector.singularize(prev)
290
+ suffix = type == :uuid ? "_uuid" : "_id"
291
+ "#{base}#{suffix}"
292
+ end
293
+ end
294
+ end
data/lib/iriq/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Iriq
2
- VERSION = "0.2.0"
2
+ VERSION = "0.30.2"
3
3
  end
data/lib/iriq.rb CHANGED
@@ -3,16 +3,33 @@ require "iriq/errors"
3
3
  require "iriq/inflector"
4
4
  require "iriq/identifier"
5
5
  require "iriq/parser"
6
+ require "iriq/specificity"
7
+ require "iriq/recognizer"
8
+ require "iriq/recognizers/uuid"
9
+ require "iriq/recognizers/date"
10
+ require "iriq/recognizers/integer"
6
11
  require "iriq/segment_classifier"
7
12
  require "iriq/segment_hints"
13
+ require "iriq/shape"
8
14
  require "iriq/path_shape"
9
15
  require "iriq/normalizer"
10
16
  require "iriq/explanation"
17
+ require "iriq/evidence"
18
+ require "iriq/trace"
11
19
  require "iriq/cluster"
12
20
  require "iriq/clusterer"
13
21
  require "iriq/position_stats"
22
+ require "set"
23
+
14
24
  require "iriq/observation"
25
+ require "iriq/position"
26
+ require "iriq/event"
27
+ require "iriq/reducer"
28
+ require "iriq/registrable_domain"
15
29
  require "iriq/storage"
30
+ require "iriq/recognizer_proposal"
31
+ require "iriq/synthesized_recognizer"
32
+ require "iriq/cross_host_shape"
16
33
  require "iriq/corpus"
17
34
  require "iriq/extractor"
18
35
  require "iriq/cli"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: iriq
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.30.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Pepper
@@ -92,6 +92,10 @@ files:
92
92
  - LICENSE.txt
93
93
  - Makefile
94
94
  - README.md
95
+ - completions/_iriq
96
+ - completions/iriq.bash
97
+ - docs/ARCHITECTURE.md
98
+ - docs/ROADMAP.md
95
99
  - exe/iriq
96
100
  - iriq.gemspec
97
101
  - lib/iriq.rb
@@ -99,7 +103,10 @@ files:
99
103
  - lib/iriq/cluster.rb
100
104
  - lib/iriq/clusterer.rb
101
105
  - lib/iriq/corpus.rb
106
+ - lib/iriq/cross_host_shape.rb
102
107
  - lib/iriq/errors.rb
108
+ - lib/iriq/event.rb
109
+ - lib/iriq/evidence.rb
103
110
  - lib/iriq/explanation.rb
104
111
  - lib/iriq/extractor.rb
105
112
  - lib/iriq/identifier.rb
@@ -108,13 +115,25 @@ files:
108
115
  - lib/iriq/observation.rb
109
116
  - lib/iriq/parser.rb
110
117
  - lib/iriq/path_shape.rb
118
+ - lib/iriq/position.rb
111
119
  - lib/iriq/position_stats.rb
120
+ - lib/iriq/recognizer.rb
121
+ - lib/iriq/recognizer_proposal.rb
122
+ - lib/iriq/recognizers/date.rb
123
+ - lib/iriq/recognizers/integer.rb
124
+ - lib/iriq/recognizers/uuid.rb
125
+ - lib/iriq/reducer.rb
126
+ - lib/iriq/registrable_domain.rb
112
127
  - lib/iriq/segment_classifier.rb
113
128
  - lib/iriq/segment_hints.rb
129
+ - lib/iriq/shape.rb
130
+ - lib/iriq/specificity.rb
114
131
  - lib/iriq/storage.rb
115
132
  - lib/iriq/storage/json.rb
116
133
  - lib/iriq/storage/memory.rb
117
134
  - lib/iriq/storage/sqlite.rb
135
+ - lib/iriq/synthesized_recognizer.rb
136
+ - lib/iriq/trace.rb
118
137
  - lib/iriq/version.rb
119
138
  homepage: https://github.com/dpep/iriq
120
139
  licenses:
@@ -127,14 +146,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
127
146
  requirements:
128
147
  - - ">="
129
148
  - !ruby/object:Gem::Version
130
- version: '3.2'
149
+ version: '3.4'
131
150
  required_rubygems_version: !ruby/object:Gem::Requirement
132
151
  requirements:
133
152
  - - ">="
134
153
  - !ruby/object:Gem::Version
135
154
  version: '0'
136
155
  requirements: []
137
- rubygems_version: 3.6.9
156
+ rubygems_version: 4.0.11
138
157
  specification_version: 4
139
158
  summary: IRI extraction, normalization, and clustering.
140
159
  test_files: []