fbi-proxy 1.9.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/rs/routes.rs ADDED
@@ -0,0 +1,976 @@
1
+ //! Rule-based routing engine for fbi-proxy.
2
+ //!
3
+ //! This module implements a configurable, placeholder-based rule system
4
+ //! that replaces (eventually) the hardcoded `parse_host` logic in
5
+ //! `rs/fbi-proxy.rs`. Routes are described declaratively (e.g. in YAML)
6
+ //! as a `match` pattern + a `target` template + optional `headers`
7
+ //! templates. The engine compiles each rule into a regular expression
8
+ //! and, at request time, picks the first rule whose pattern matches
9
+ //! the incoming host, then expands the templates using the captured
10
+ //! placeholder values.
11
+ //!
12
+ //! # Placeholder syntax
13
+ //!
14
+ //! Placeholders in patterns and templates use brace syntax:
15
+ //!
16
+ //! * `{name}` — matches one host segment: `[^.]+`
17
+ //! * `{name:int}` — matches one numeric segment: `\d+`
18
+ //! * `{name:slug}` — matches `[a-z0-9-]+`
19
+ //! * `{name:multi}` — matches one or more dot-separated segments:
20
+ //! `[^.]+(\.[^.]+)*`. Use this for DNS-passthrough
21
+ //! patterns like `{upstream:multi}.{domain}` that
22
+ //! need to capture e.g. `github.com` as one value.
23
+ //!
24
+ //! A given placeholder name can appear in both the `match` pattern
25
+ //! (where it captures) and in the `target` / `headers` templates
26
+ //! (where it is substituted from the corresponding capture).
27
+ //!
28
+ //! Literal characters in patterns (dots, dashes, etc.) are anchored
29
+ //! by Rust's `regex` crate after escaping; the whole pattern is
30
+ //! implicitly anchored with `^...$`.
31
+ //!
32
+ //! # `{domain}` and multi-dot subdomain semantics
33
+ //!
34
+ //! `{domain}` is **not** special-cased by this engine. It is just a
35
+ //! placeholder name like any other. The default `routes.yaml` uses
36
+ //! `{domain}` by convention to mean "the trailing fbi-proxy domain
37
+ //! (e.g. `fbi.com`)" but the engine treats it the same as `{host}`,
38
+ //! `{port}`, etc.
39
+ //!
40
+ //! This means a pattern like `{prefix}.{host}.{domain}` is *greedy
41
+ //! left-to-right* in the sense that each placeholder matches a single
42
+ //! dot-free segment. For a host like `a.b.c.fbi.com` against
43
+ //! `{prefix}.{host}.{domain}`, no match is produced because `{domain}`
44
+ //! can only consume one segment (`com`), `{host}` consumes `fbi`, and
45
+ //! `{prefix}` would have to consume `a.b.c` — which it can't, because
46
+ //! `{prefix}` is `[^.]+`.
47
+ //!
48
+ //! Callers that want multi-dot domains (e.g. `fbi.example.com`) should
49
+ //! either:
50
+ //! 1. Strip the domain suffix before calling `match_host` (which is
51
+ //! what `match_host_with_domain` does), or
52
+ //! 2. Encode the multi-dot literal directly in the pattern
53
+ //! (e.g. `{prefix}.{host}.fbi.example.com`).
54
+ //!
55
+ //! `match_host_with_domain(routes, host, Some("fbi.example.com"))` is
56
+ //! the convenience helper: it strips `.fbi.example.com` from the host
57
+ //! before matching, then re-injects the value as the `{domain}`
58
+ //! capture for template expansion.
59
+
60
+ use regex::Regex;
61
+ use serde::Deserialize;
62
+ use std::collections::HashMap;
63
+ use std::fmt;
64
+
65
+ /// Placeholder kind — controls the regex fragment used to match.
66
+ #[derive(Debug, Clone, Copy, PartialEq, Eq)]
67
+ pub enum PlaceholderKind {
68
+ /// `{name}` — matches one host segment (no dot): `[^.]+`.
69
+ Any,
70
+ /// `{name:int}` — matches `\d+`.
71
+ Int,
72
+ /// `{name:slug}` — matches `[a-z0-9-]+`.
73
+ Slug,
74
+ /// `{name:multi}` — matches one or more dot-separated segments.
75
+ /// Use for DNS-passthrough patterns (e.g. `{upstream:multi}.fbi.com`
76
+ /// capturing `github.com` as one value).
77
+ Multi,
78
+ }
79
+
80
+ impl PlaceholderKind {
81
+ fn regex_fragment(self) -> &'static str {
82
+ match self {
83
+ PlaceholderKind::Any => "[^.]+",
84
+ PlaceholderKind::Int => r"\d+",
85
+ PlaceholderKind::Slug => "[a-z0-9-]+",
86
+ PlaceholderKind::Multi => r"[^.]+(?:\.[^.]+)*",
87
+ }
88
+ }
89
+ }
90
+
91
+ /// Special-cased placeholder names that need to match more than a
92
+ /// single dot-free segment. Currently only `{domain}`: it matches
93
+ /// two-or-more dot-separated segments (e.g. `fbi.com`, `fbi.example.com`)
94
+ /// but NOT a single bare segment like `com`. This is important because
95
+ /// it makes the default rule ordering unambiguous: in
96
+ /// `{prefix}.{host}.{domain}`, the trailing `{domain}` greedily eats
97
+ /// the multi-segment suffix instead of collapsing to a single segment
98
+ /// (which would cause `myserver.fbi.com` to be mis-classified as
99
+ /// `prefix=myserver, host=fbi, domain=com`).
100
+ fn special_regex_fragment(name: &str) -> Option<&'static str> {
101
+ match name {
102
+ "domain" => Some(r"[a-zA-Z0-9\-]+(?:\.[a-zA-Z0-9\-]+)+"),
103
+ _ => None,
104
+ }
105
+ }
106
+
107
+ /// A single named placeholder captured by a compiled route.
108
+ #[derive(Debug, Clone)]
109
+ pub struct Placeholder {
110
+ pub name: String,
111
+ pub kind: PlaceholderKind,
112
+ }
113
+
114
+ /// User-supplied route configuration (e.g. from `routes.yaml`).
115
+ #[derive(Debug, Clone, Deserialize)]
116
+ pub struct RouteConfig {
117
+ pub name: String,
118
+ /// Pattern matched against the Host header (without port).
119
+ /// E.g. `"{port:int}.{domain}"`.
120
+ #[serde(rename = "match")]
121
+ pub r#match: String,
122
+ /// Target template, e.g. `"127.0.0.1:{port}"`.
123
+ pub target: String,
124
+ /// Header templates. The special key `"Host"` (case-insensitive)
125
+ /// is surfaced separately on `RouteHit::host_header`.
126
+ #[serde(default)]
127
+ pub headers: Option<HashMap<String, String>>,
128
+ }
129
+
130
+ /// Top-level shape of `routes.yaml`.
131
+ #[derive(Debug, Clone, Deserialize)]
132
+ pub struct RoutesFile {
133
+ #[serde(default = "default_version")]
134
+ pub version: u32,
135
+ pub routes: Vec<RouteConfig>,
136
+ }
137
+
138
+ fn default_version() -> u32 {
139
+ 1
140
+ }
141
+
142
+ /// Parse a `routes.yaml`-style document.
143
+ pub fn parse_yaml(src: &str) -> Result<RoutesFile, serde_yaml::Error> {
144
+ serde_yaml::from_str(src)
145
+ }
146
+
147
+ /// A compiled route — regex + templates — ready to evaluate per request.
148
+ #[derive(Debug, Clone)]
149
+ pub struct CompiledRoute {
150
+ pub name: String,
151
+ pub pattern: Regex,
152
+ pub placeholders: Vec<Placeholder>,
153
+ pub target_template: String,
154
+ pub header_templates: HashMap<String, String>,
155
+ }
156
+
157
+ /// Result of a successful match.
158
+ #[derive(Debug, Clone, PartialEq, Eq)]
159
+ pub struct RouteHit {
160
+ pub route_name: String,
161
+ /// Expanded `target` template (e.g. `"api:3001"`).
162
+ pub target: String,
163
+ /// Expanded `Host` header from the `headers` map, if any.
164
+ pub host_header: Option<String>,
165
+ /// Other expanded headers, excluding `Host` (case-insensitive).
166
+ pub other_headers: HashMap<String, String>,
167
+ }
168
+
169
+ /// Compile-time error from `compile`.
170
+ #[derive(Debug, Clone)]
171
+ pub enum CompileError {
172
+ /// A placeholder spec was malformed, e.g. `{na me}` or `{:int}`.
173
+ InvalidPlaceholder { route: String, placeholder: String, reason: String },
174
+ /// An unknown placeholder kind, e.g. `{name:foo}`.
175
+ UnknownKind { route: String, name: String, kind: String },
176
+ /// The same placeholder name was declared twice in the same pattern.
177
+ DuplicatePlaceholder { route: String, name: String },
178
+ /// The generated regex failed to compile (very unlikely — usually
179
+ /// an indication of weird literal characters that escaped wrong).
180
+ InvalidRegex { route: String, source: String },
181
+ /// A `{name}` appeared in the target/header template but was never
182
+ /// declared in the match pattern.
183
+ UndeclaredPlaceholder { route: String, name: String, location: String },
184
+ /// Unbalanced braces in pattern or template.
185
+ UnbalancedBraces { route: String, location: String },
186
+ }
187
+
188
+ impl fmt::Display for CompileError {
189
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
190
+ match self {
191
+ CompileError::InvalidPlaceholder { route, placeholder, reason } => {
192
+ write!(f, "route '{}': invalid placeholder '{{{}}}': {}", route, placeholder, reason)
193
+ }
194
+ CompileError::UnknownKind { route, name, kind } => {
195
+ write!(f, "route '{}': unknown placeholder kind ':{}' for '{{{}}}' (expected int|slug|multi or none)", route, kind, name)
196
+ }
197
+ CompileError::DuplicatePlaceholder { route, name } => {
198
+ write!(f, "route '{}': placeholder '{{{}}}' declared twice in match pattern", route, name)
199
+ }
200
+ CompileError::InvalidRegex { route, source } => {
201
+ write!(f, "route '{}': internal regex compile error: {}", route, source)
202
+ }
203
+ CompileError::UndeclaredPlaceholder { route, name, location } => {
204
+ write!(f, "route '{}': placeholder '{{{}}}' used in {} but never declared in match pattern", route, name, location)
205
+ }
206
+ CompileError::UnbalancedBraces { route, location } => {
207
+ write!(f, "route '{}': unbalanced braces in {}", route, location)
208
+ }
209
+ }
210
+ }
211
+ }
212
+
213
+ impl std::error::Error for CompileError {}
214
+
215
+ // ---------------------------------------------------------------------------
216
+ // Parsing helpers
217
+ // ---------------------------------------------------------------------------
218
+
219
+ /// A token of a parsed pattern / template string.
220
+ #[derive(Debug, Clone, PartialEq, Eq)]
221
+ enum Token {
222
+ Literal(String),
223
+ Placeholder { name: String, kind: Option<String> },
224
+ }
225
+
226
+ /// Tokenize a `{name[:kind]}`-style template. Returns the token list
227
+ /// or `Err(UnbalancedBraces)` on malformed input.
228
+ fn tokenize(s: &str, route: &str, location: &str) -> Result<Vec<Token>, CompileError> {
229
+ let mut out = Vec::new();
230
+ let mut buf = String::new();
231
+ let mut chars = s.chars().peekable();
232
+
233
+ while let Some(c) = chars.next() {
234
+ if c == '{' {
235
+ // flush literal
236
+ if !buf.is_empty() {
237
+ out.push(Token::Literal(std::mem::take(&mut buf)));
238
+ }
239
+ // collect until '}'
240
+ let mut spec = String::new();
241
+ let mut closed = false;
242
+ while let Some(&nc) = chars.peek() {
243
+ chars.next();
244
+ if nc == '}' {
245
+ closed = true;
246
+ break;
247
+ }
248
+ spec.push(nc);
249
+ }
250
+ if !closed {
251
+ return Err(CompileError::UnbalancedBraces {
252
+ route: route.to_string(),
253
+ location: location.to_string(),
254
+ });
255
+ }
256
+ // parse "name" or "name:kind"
257
+ let (name, kind) = match spec.split_once(':') {
258
+ Some((n, k)) => (n.to_string(), Some(k.to_string())),
259
+ None => (spec.clone(), None),
260
+ };
261
+ out.push(Token::Placeholder { name, kind });
262
+ } else if c == '}' {
263
+ return Err(CompileError::UnbalancedBraces {
264
+ route: route.to_string(),
265
+ location: location.to_string(),
266
+ });
267
+ } else {
268
+ buf.push(c);
269
+ }
270
+ }
271
+ if !buf.is_empty() {
272
+ out.push(Token::Literal(buf));
273
+ }
274
+ Ok(out)
275
+ }
276
+
277
+ fn parse_kind(route: &str, name: &str, kind: Option<&str>) -> Result<PlaceholderKind, CompileError> {
278
+ match kind {
279
+ None | Some("") => Ok(PlaceholderKind::Any),
280
+ Some("int") => Ok(PlaceholderKind::Int),
281
+ Some("slug") => Ok(PlaceholderKind::Slug),
282
+ Some("multi") => Ok(PlaceholderKind::Multi),
283
+ Some(other) => Err(CompileError::UnknownKind {
284
+ route: route.to_string(),
285
+ name: name.to_string(),
286
+ kind: other.to_string(),
287
+ }),
288
+ }
289
+ }
290
+
291
+ fn validate_name(route: &str, raw_spec: &str, name: &str) -> Result<(), CompileError> {
292
+ if name.is_empty() {
293
+ return Err(CompileError::InvalidPlaceholder {
294
+ route: route.to_string(),
295
+ placeholder: raw_spec.to_string(),
296
+ reason: "empty placeholder name".to_string(),
297
+ });
298
+ }
299
+ let first = name.chars().next().unwrap();
300
+ if !(first.is_ascii_alphabetic() || first == '_') {
301
+ return Err(CompileError::InvalidPlaceholder {
302
+ route: route.to_string(),
303
+ placeholder: raw_spec.to_string(),
304
+ reason: "name must start with a letter or '_'".to_string(),
305
+ });
306
+ }
307
+ for c in name.chars() {
308
+ if !(c.is_ascii_alphanumeric() || c == '_') {
309
+ return Err(CompileError::InvalidPlaceholder {
310
+ route: route.to_string(),
311
+ placeholder: raw_spec.to_string(),
312
+ reason: format!("name contains invalid character '{}'", c),
313
+ });
314
+ }
315
+ }
316
+ Ok(())
317
+ }
318
+
319
+ // ---------------------------------------------------------------------------
320
+ // Compile
321
+ // ---------------------------------------------------------------------------
322
+
323
+ /// Compile a list of `RouteConfig`s into ready-to-use `CompiledRoute`s.
324
+ ///
325
+ /// Returns the first error encountered.
326
+ pub fn compile(routes: Vec<RouteConfig>) -> Result<Vec<CompiledRoute>, CompileError> {
327
+ let mut out = Vec::with_capacity(routes.len());
328
+ for r in routes {
329
+ out.push(compile_one(r)?);
330
+ }
331
+ Ok(out)
332
+ }
333
+
334
+ fn compile_one(cfg: RouteConfig) -> Result<CompiledRoute, CompileError> {
335
+ let route_name = cfg.name.clone();
336
+ let tokens = tokenize(&cfg.r#match, &route_name, "match pattern")?;
337
+
338
+ let mut declared: Vec<Placeholder> = Vec::new();
339
+ let mut regex_src = String::from("^");
340
+ for tok in &tokens {
341
+ match tok {
342
+ Token::Literal(lit) => {
343
+ regex_src.push_str(&regex::escape(lit));
344
+ }
345
+ Token::Placeholder { name, kind } => {
346
+ let raw_spec = match kind {
347
+ Some(k) => format!("{}:{}", name, k),
348
+ None => name.clone(),
349
+ };
350
+ validate_name(&route_name, &raw_spec, name)?;
351
+ let parsed_kind = parse_kind(&route_name, name, kind.as_deref())?;
352
+ if declared.iter().any(|p| p.name == *name) {
353
+ return Err(CompileError::DuplicatePlaceholder {
354
+ route: route_name,
355
+ name: name.clone(),
356
+ });
357
+ }
358
+ declared.push(Placeholder { name: name.clone(), kind: parsed_kind });
359
+ regex_src.push('(');
360
+ regex_src.push_str("?P<");
361
+ regex_src.push_str(name);
362
+ regex_src.push('>');
363
+ // If the user did not specify an explicit kind (e.g.
364
+ // `{domain}` not `{domain:slug}`) AND the name is one
365
+ // of the well-known multi-segment names, broaden the
366
+ // fragment to allow dots. This is what makes
367
+ // `{port:int}.{domain}` work for `3000.fbi.com`.
368
+ if kind.is_none() {
369
+ if let Some(frag) = special_regex_fragment(name) {
370
+ regex_src.push_str(frag);
371
+ } else {
372
+ regex_src.push_str(parsed_kind.regex_fragment());
373
+ }
374
+ } else {
375
+ regex_src.push_str(parsed_kind.regex_fragment());
376
+ }
377
+ regex_src.push(')');
378
+ }
379
+ }
380
+ }
381
+ regex_src.push('$');
382
+
383
+ let pattern = Regex::new(&regex_src).map_err(|e| CompileError::InvalidRegex {
384
+ route: route_name.clone(),
385
+ source: e.to_string(),
386
+ })?;
387
+
388
+ // Validate target template references known placeholders only.
389
+ let target_tokens = tokenize(&cfg.target, &route_name, "target template")?;
390
+ for tok in &target_tokens {
391
+ if let Token::Placeholder { name, .. } = tok {
392
+ validate_name(&route_name, name, name)?;
393
+ if !declared.iter().any(|p| p.name == *name) {
394
+ return Err(CompileError::UndeclaredPlaceholder {
395
+ route: route_name,
396
+ name: name.clone(),
397
+ location: "target template".to_string(),
398
+ });
399
+ }
400
+ }
401
+ }
402
+
403
+ let mut header_templates: HashMap<String, String> = HashMap::new();
404
+ if let Some(headers) = cfg.headers {
405
+ for (k, v) in headers {
406
+ let header_tokens = tokenize(&v, &route_name, &format!("header '{}'", k))?;
407
+ for tok in &header_tokens {
408
+ if let Token::Placeholder { name, .. } = tok {
409
+ validate_name(&route_name, name, name)?;
410
+ if !declared.iter().any(|p| p.name == *name) {
411
+ return Err(CompileError::UndeclaredPlaceholder {
412
+ route: route_name.clone(),
413
+ name: name.clone(),
414
+ location: format!("header '{}'", k),
415
+ });
416
+ }
417
+ }
418
+ }
419
+ header_templates.insert(k, v);
420
+ }
421
+ }
422
+
423
+ Ok(CompiledRoute {
424
+ name: route_name,
425
+ pattern,
426
+ placeholders: declared,
427
+ target_template: cfg.target,
428
+ header_templates,
429
+ })
430
+ }
431
+
432
+ // ---------------------------------------------------------------------------
433
+ // Match
434
+ // ---------------------------------------------------------------------------
435
+
436
+ /// Strip a trailing `:port` from a host string. Used for normalization
437
+ /// before matching.
438
+ fn strip_port(host: &str) -> &str {
439
+ match host.rfind(':') {
440
+ Some(i) => &host[..i],
441
+ None => host,
442
+ }
443
+ }
444
+
445
+ /// Strip trailing slash if present (some clients include one).
446
+ fn strip_trailing_slash(host: &str) -> &str {
447
+ host.strip_suffix('/').unwrap_or(host)
448
+ }
449
+
450
+ fn normalize(host: &str) -> String {
451
+ // Host header is case-insensitive per RFC 7230 §5.4.
452
+ strip_trailing_slash(strip_port(host)).to_ascii_lowercase()
453
+ }
454
+
455
+ /// Expand a template string using captured placeholders.
456
+ fn expand(template: &str, captures: &HashMap<String, String>) -> String {
457
+ // We can re-use the tokenizer here, but since we already validated
458
+ // at compile-time, this is purely substitution: scan for {name[:kind]}
459
+ // and replace.
460
+ let mut out = String::with_capacity(template.len());
461
+ let mut chars = template.chars().peekable();
462
+ while let Some(c) = chars.next() {
463
+ if c == '{' {
464
+ let mut spec = String::new();
465
+ while let Some(&nc) = chars.peek() {
466
+ chars.next();
467
+ if nc == '}' {
468
+ break;
469
+ }
470
+ spec.push(nc);
471
+ }
472
+ // strip optional :kind
473
+ let name = match spec.split_once(':') {
474
+ Some((n, _)) => n.to_string(),
475
+ None => spec,
476
+ };
477
+ if let Some(v) = captures.get(&name) {
478
+ out.push_str(v);
479
+ }
480
+ // if not present, drop silently — compile() has already
481
+ // validated that all placeholders are declared.
482
+ } else {
483
+ out.push(c);
484
+ }
485
+ }
486
+ out
487
+ }
488
+
489
+ /// Try to match a host against the compiled routes. Returns the first
490
+ /// match (top-to-bottom order in the config).
491
+ pub fn match_host(routes: &[CompiledRoute], host: &str) -> Option<RouteHit> {
492
+ match_host_with_domain(routes, host, None)
493
+ }
494
+
495
+ /// Like `match_host`, but if `default_domain` is `Some("fbi.com")`,
496
+ /// the host must end with `.fbi.com` (or be exactly `fbi.com`),
497
+ /// otherwise no match is produced. The full host (including the
498
+ /// domain suffix) is then matched against each compiled route's
499
+ /// pattern, so `{domain}` in the pattern naturally captures the
500
+ /// multi-dot suffix.
501
+ ///
502
+ /// If `default_domain` is `None`, the host is matched as-is.
503
+ pub fn match_host_with_domain(
504
+ routes: &[CompiledRoute],
505
+ host: &str,
506
+ default_domain: Option<&str>,
507
+ ) -> Option<RouteHit> {
508
+ let host = normalize(host);
509
+
510
+ if let Some(domain) = default_domain {
511
+ if !domain.is_empty() {
512
+ let domain_lc = domain.to_ascii_lowercase();
513
+ if host != domain_lc && !host.ends_with(&format!(".{}", domain_lc)) {
514
+ return None;
515
+ }
516
+ }
517
+ }
518
+
519
+ for route in routes {
520
+ if let Some(caps) = route.pattern.captures(&host) {
521
+ let mut values: HashMap<String, String> = HashMap::new();
522
+ for p in &route.placeholders {
523
+ if let Some(m) = caps.name(&p.name) {
524
+ values.insert(p.name.clone(), m.as_str().to_string());
525
+ }
526
+ }
527
+
528
+ let target = expand(&route.target_template, &values);
529
+
530
+ let mut host_header: Option<String> = None;
531
+ let mut other_headers: HashMap<String, String> = HashMap::new();
532
+ for (k, tmpl) in &route.header_templates {
533
+ let v = expand(tmpl, &values);
534
+ if k.eq_ignore_ascii_case("host") {
535
+ host_header = Some(v);
536
+ } else {
537
+ other_headers.insert(k.clone(), v);
538
+ }
539
+ }
540
+
541
+ return Some(RouteHit {
542
+ route_name: route.name.clone(),
543
+ target,
544
+ host_header,
545
+ other_headers,
546
+ });
547
+ }
548
+ }
549
+ None
550
+ }
551
+
552
+ // ---------------------------------------------------------------------------
553
+ // Tests
554
+ // ---------------------------------------------------------------------------
555
+
556
+ #[cfg(test)]
557
+ mod tests {
558
+ use super::*;
559
+
560
+ fn default_routes() -> Vec<CompiledRoute> {
561
+ let configs = vec![
562
+ RouteConfig {
563
+ name: "port-as-host".into(),
564
+ r#match: "{port:int}.{domain}".into(),
565
+ target: "127.0.0.1:{port}".into(),
566
+ headers: None,
567
+ },
568
+ RouteConfig {
569
+ name: "host-double-dash-port".into(),
570
+ r#match: "{host}--{port:int}.{domain}".into(),
571
+ target: "{host}:{port}".into(),
572
+ headers: Some({
573
+ let mut h = HashMap::new();
574
+ h.insert("Host".into(), "{host}".into());
575
+ h
576
+ }),
577
+ },
578
+ RouteConfig {
579
+ name: "subdomain-hoisting".into(),
580
+ r#match: "{prefix}.{host}.{domain}".into(),
581
+ target: "{host}:80".into(),
582
+ headers: Some({
583
+ let mut h = HashMap::new();
584
+ h.insert("Host".into(), "{prefix}".into());
585
+ h
586
+ }),
587
+ },
588
+ RouteConfig {
589
+ name: "direct-forward".into(),
590
+ r#match: "{host}.{domain}".into(),
591
+ target: "{host}:80".into(),
592
+ headers: Some({
593
+ let mut h = HashMap::new();
594
+ h.insert("Host".into(), "{host}".into());
595
+ h
596
+ }),
597
+ },
598
+ ];
599
+ compile(configs).expect("compile default routes")
600
+ }
601
+
602
+ /// All default-rule tests use the `fbi.com` domain filter, which
603
+ /// is the way these rules are intended to be used (domain stripping
604
+ /// is handled by the filter; the rules then route the remaining
605
+ /// prefix).
606
+ fn m(routes: &[CompiledRoute], host: &str) -> Option<RouteHit> {
607
+ match_host_with_domain(routes, host, Some("fbi.com"))
608
+ }
609
+
610
+ #[test]
611
+ fn empty_routes_no_match() {
612
+ let hit = match_host(&[], "anything.fbi.com");
613
+ assert!(hit.is_none());
614
+ }
615
+
616
+ #[test]
617
+ fn port_as_host_matches() {
618
+ let routes = default_routes();
619
+ let hit = m(&routes, "3000.fbi.com").expect("should match");
620
+ assert_eq!(hit.route_name, "port-as-host");
621
+ assert_eq!(hit.target, "127.0.0.1:3000");
622
+ assert_eq!(hit.host_header, None);
623
+ }
624
+
625
+ #[test]
626
+ fn host_double_dash_port_matches() {
627
+ let routes = default_routes();
628
+ let hit = m(&routes, "api--3001.fbi.com").expect("should match");
629
+ assert_eq!(hit.route_name, "host-double-dash-port");
630
+ assert_eq!(hit.target, "api:3001");
631
+ assert_eq!(hit.host_header.as_deref(), Some("api"));
632
+ }
633
+
634
+ #[test]
635
+ fn subdomain_hoisting_matches() {
636
+ let routes = default_routes();
637
+ let hit = m(&routes, "admin.app.fbi.com").expect("should match");
638
+ assert_eq!(hit.route_name, "subdomain-hoisting");
639
+ assert_eq!(hit.target, "app:80");
640
+ assert_eq!(hit.host_header.as_deref(), Some("admin"));
641
+ }
642
+
643
+ #[test]
644
+ fn direct_forward_matches() {
645
+ let routes = default_routes();
646
+ let hit = m(&routes, "myserver.fbi.com").expect("should match");
647
+ assert_eq!(hit.route_name, "direct-forward");
648
+ assert_eq!(hit.target, "myserver:80");
649
+ assert_eq!(hit.host_header.as_deref(), Some("myserver"));
650
+ }
651
+
652
+ #[test]
653
+ fn port_in_host_is_stripped_before_match() {
654
+ let routes = default_routes();
655
+ let hit = m(&routes, "myserver.fbi.com:8080").expect("should match");
656
+ assert_eq!(hit.route_name, "direct-forward");
657
+ assert_eq!(hit.target, "myserver:80");
658
+ }
659
+
660
+ #[test]
661
+ fn trailing_slash_stripped() {
662
+ let routes = default_routes();
663
+ let hit = m(&routes, "3000.fbi.com/").expect("should match");
664
+ assert_eq!(hit.route_name, "port-as-host");
665
+ }
666
+
667
+ #[test]
668
+ fn host_header_is_case_insensitive() {
669
+ let routes = default_routes();
670
+ let hit = m(&routes, "API--3001.FBI.COM").expect("should match");
671
+ assert_eq!(hit.route_name, "host-double-dash-port");
672
+ assert_eq!(hit.target, "api:3001");
673
+ }
674
+
675
+ #[test]
676
+ fn multi_dot_subdomain_assigns_domain_greedily() {
677
+ // For `a.b.c.fbi.com` against `{prefix}.{host}.{domain}`, the
678
+ // regex anchors left-to-right: {prefix} and {host} each
679
+ // capture one dot-free segment, and {domain} (which has the
680
+ // special multi-dot fragment) captures the rest.
681
+ //
682
+ // So the match is: prefix=a, host=b, domain=c.fbi.com.
683
+ //
684
+ // This may or may not be what the user intends. Document this
685
+ // ambiguity: if the user wants `prefix=a.b.c, host=fbi,
686
+ // domain=com`, they need a different pattern (with explicit
687
+ // literals for the trailing domain).
688
+ let routes = default_routes();
689
+ let hit = match_host(&routes, "a.b.c.fbi.com").expect("should match");
690
+ assert_eq!(hit.route_name, "subdomain-hoisting");
691
+ // host=b, target={host}:80 = b:80
692
+ assert_eq!(hit.target, "b:80");
693
+ // Host header = {prefix} = "a"
694
+ assert_eq!(hit.host_header.as_deref(), Some("a"));
695
+ }
696
+
697
+ #[test]
698
+ fn multi_dot_subdomain_with_domain_filter_is_unambiguous() {
699
+ // When the caller passes the default-domain (`fbi.com`), the
700
+ // regex still matches the full host but {domain} is now
701
+ // constrained to exactly the trailing "fbi.com" suffix via
702
+ // the domain filter. Actually, the filter only validates the
703
+ // suffix — the regex itself is still greedy. But for the
704
+ // typical "this is my fbi-proxy domain" usage, the host shape
705
+ // is single-prefix.subdomain.{domain}, which works as
706
+ // expected.
707
+ let routes = default_routes();
708
+ // admin.app.fbi.com -> subdomain-hoisting (prefix=admin, host=app, domain=fbi.com)
709
+ let hit = match_host_with_domain(&routes, "admin.app.fbi.com", Some("fbi.com"))
710
+ .expect("should match");
711
+ assert_eq!(hit.route_name, "subdomain-hoisting");
712
+ assert_eq!(hit.target, "app:80");
713
+ assert_eq!(hit.host_header.as_deref(), Some("admin"));
714
+ }
715
+
716
+ #[test]
717
+ fn first_match_wins() {
718
+ let routes = compile(vec![
719
+ RouteConfig {
720
+ name: "first".into(),
721
+ r#match: "{x}.{y}".into(),
722
+ target: "first-target".into(),
723
+ headers: None,
724
+ },
725
+ RouteConfig {
726
+ name: "second".into(),
727
+ r#match: "{x}.{y}".into(),
728
+ target: "second-target".into(),
729
+ headers: None,
730
+ },
731
+ ])
732
+ .unwrap();
733
+ let hit = match_host(&routes, "a.b").expect("should match");
734
+ assert_eq!(hit.route_name, "first");
735
+ assert_eq!(hit.target, "first-target");
736
+ }
737
+
738
+ #[test]
739
+ fn unknown_placeholder_kind_errors() {
740
+ let err = compile(vec![RouteConfig {
741
+ name: "bad".into(),
742
+ r#match: "{port:zzz}.com".into(),
743
+ target: "x".into(),
744
+ headers: None,
745
+ }])
746
+ .unwrap_err();
747
+ match err {
748
+ CompileError::UnknownKind { kind, .. } => assert_eq!(kind, "zzz"),
749
+ e => panic!("expected UnknownKind, got {:?}", e),
750
+ }
751
+ }
752
+
753
+ #[test]
754
+ fn unbalanced_braces_in_pattern_errors() {
755
+ let err = compile(vec![RouteConfig {
756
+ name: "bad".into(),
757
+ r#match: "{port".into(),
758
+ target: "x".into(),
759
+ headers: None,
760
+ }])
761
+ .unwrap_err();
762
+ match err {
763
+ CompileError::UnbalancedBraces { location, .. } => {
764
+ assert!(location.contains("match"))
765
+ }
766
+ e => panic!("expected UnbalancedBraces, got {:?}", e),
767
+ }
768
+ }
769
+
770
+ #[test]
771
+ fn duplicate_placeholder_errors() {
772
+ let err = compile(vec![RouteConfig {
773
+ name: "bad".into(),
774
+ r#match: "{x}.{x}".into(),
775
+ target: "y".into(),
776
+ headers: None,
777
+ }])
778
+ .unwrap_err();
779
+ match err {
780
+ CompileError::DuplicatePlaceholder { name, .. } => assert_eq!(name, "x"),
781
+ e => panic!("expected DuplicatePlaceholder, got {:?}", e),
782
+ }
783
+ }
784
+
785
+ #[test]
786
+ fn undeclared_placeholder_in_target_errors() {
787
+ let err = compile(vec![RouteConfig {
788
+ name: "bad".into(),
789
+ r#match: "{x}.{y}".into(),
790
+ target: "{z}".into(),
791
+ headers: None,
792
+ }])
793
+ .unwrap_err();
794
+ match err {
795
+ CompileError::UndeclaredPlaceholder { name, location, .. } => {
796
+ assert_eq!(name, "z");
797
+ assert!(location.contains("target"));
798
+ }
799
+ e => panic!("expected UndeclaredPlaceholder, got {:?}", e),
800
+ }
801
+ }
802
+
803
+ #[test]
804
+ fn invalid_placeholder_name_errors() {
805
+ let err = compile(vec![RouteConfig {
806
+ name: "bad".into(),
807
+ r#match: "{1foo}".into(),
808
+ target: "x".into(),
809
+ headers: None,
810
+ }])
811
+ .unwrap_err();
812
+ match err {
813
+ CompileError::InvalidPlaceholder { .. } => {}
814
+ e => panic!("expected InvalidPlaceholder, got {:?}", e),
815
+ }
816
+ }
817
+
818
+ #[test]
819
+ fn int_kind_rejects_non_numeric() {
820
+ let routes = default_routes();
821
+ // "abc.fbi.com" should NOT match port-as-host (because abc isn't \d+),
822
+ // but should fall through to direct-forward.
823
+ let hit = m(&routes, "abc.fbi.com").expect("should match");
824
+ assert_eq!(hit.route_name, "direct-forward");
825
+ assert_eq!(hit.target, "abc:80");
826
+ }
827
+
828
+ #[test]
829
+ fn match_host_with_domain_filter_accepts_matching() {
830
+ let routes = default_routes();
831
+ let hit = match_host_with_domain(&routes, "3000.fbi.com", Some("fbi.com"))
832
+ .expect("should match");
833
+ assert_eq!(hit.route_name, "port-as-host");
834
+ assert_eq!(hit.target, "127.0.0.1:3000");
835
+ }
836
+
837
+ #[test]
838
+ fn match_host_with_domain_filter_rejects_non_matching() {
839
+ let routes = default_routes();
840
+ let hit = match_host_with_domain(&routes, "evil.example.com", Some("fbi.com"));
841
+ assert!(hit.is_none());
842
+ }
843
+
844
+ #[test]
845
+ fn match_host_with_multi_dot_domain() {
846
+ // The default-domain filter (`fbi.example.com`) only validates
847
+ // the suffix. The pattern itself still matches the full host,
848
+ // and {domain} naturally captures multi-segment trailing parts.
849
+ let routes = compile(vec![RouteConfig {
850
+ name: "direct".into(),
851
+ r#match: "{host}.{domain}".into(),
852
+ target: "{host}:80".into(),
853
+ headers: None,
854
+ }])
855
+ .unwrap();
856
+ let hit =
857
+ match_host_with_domain(&routes, "myserver.fbi.example.com", Some("fbi.example.com"))
858
+ .expect("should match");
859
+ assert_eq!(hit.target, "myserver:80");
860
+ }
861
+
862
+ #[test]
863
+ fn match_host_with_multi_dot_domain_rejects_wrong_suffix() {
864
+ let routes = compile(vec![RouteConfig {
865
+ name: "direct".into(),
866
+ r#match: "{host}.{domain}".into(),
867
+ target: "{host}:80".into(),
868
+ headers: None,
869
+ }])
870
+ .unwrap();
871
+ let hit = match_host_with_domain(&routes, "myserver.other.com", Some("fbi.example.com"));
872
+ assert!(hit.is_none());
873
+ }
874
+
875
+ #[test]
876
+ fn multi_kind_captures_multi_dot_segments() {
877
+ let routes = compile(vec![RouteConfig {
878
+ name: "dns-passthrough".into(),
879
+ r#match: "{upstream:multi}.fbi.com".into(),
880
+ target: "{upstream}:80".into(),
881
+ headers: None,
882
+ }])
883
+ .unwrap();
884
+
885
+ let hit = match_host(&routes, "github.com.fbi.com").unwrap();
886
+ assert_eq!(hit.target, "github.com:80");
887
+
888
+ let hit = match_host(&routes, "api.example.org.fbi.com").unwrap();
889
+ assert_eq!(hit.target, "api.example.org:80");
890
+
891
+ // Single segment still matches (one-or-more).
892
+ let hit = match_host(&routes, "single.fbi.com").unwrap();
893
+ assert_eq!(hit.target, "single:80");
894
+ }
895
+
896
+ #[test]
897
+ fn multi_kind_with_host_header_rewrite() {
898
+ let routes = compile(vec![RouteConfig {
899
+ name: "dns-with-host".into(),
900
+ r#match: "{upstream:multi}.fbi.com".into(),
901
+ target: "{upstream}:443".into(),
902
+ headers: Some(HashMap::from([("Host".into(), "{upstream}".into())])),
903
+ }])
904
+ .unwrap();
905
+ let hit = match_host(&routes, "api.example.com.fbi.com").unwrap();
906
+ assert_eq!(hit.target, "api.example.com:443");
907
+ assert_eq!(hit.host_header.as_deref(), Some("api.example.com"));
908
+ }
909
+
910
+ #[test]
911
+ fn multi_kind_with_routes_yaml() {
912
+ let yaml = r#"
913
+ routes:
914
+ - name: dns-passthrough
915
+ match: "{upstream:multi}.{domain}"
916
+ target: "{upstream}:80"
917
+ "#;
918
+ let parsed = parse_yaml(yaml).unwrap();
919
+ let routes = compile(parsed.routes).unwrap();
920
+ let hit = match_host(&routes, "github.com.fbi.com").unwrap();
921
+ assert_eq!(hit.target, "github.com:80");
922
+ }
923
+
924
+ #[test]
925
+ fn slug_kind_accepts_lowercase_and_dashes() {
926
+ let routes = compile(vec![RouteConfig {
927
+ name: "slugged".into(),
928
+ r#match: "{name:slug}.example".into(),
929
+ target: "{name}".into(),
930
+ headers: None,
931
+ }])
932
+ .unwrap();
933
+ assert!(match_host(&routes, "my-service.example").is_some());
934
+ // Uppercase normalized to lowercase by `normalize`, so it matches.
935
+ assert!(match_host(&routes, "MY-SERVICE.example").is_some());
936
+ // Underscores not allowed in slug.
937
+ assert!(match_host(&routes, "my_service.example").is_none());
938
+ }
939
+
940
+ #[test]
941
+ fn parse_yaml_default_routes() {
942
+ let yaml = r#"
943
+ version: 1
944
+ routes:
945
+ - name: port-as-host
946
+ match: "{port:int}.{domain}"
947
+ target: "127.0.0.1:{port}"
948
+ - name: direct-forward
949
+ match: "{host}.{domain}"
950
+ target: "{host}:80"
951
+ headers:
952
+ Host: "{host}"
953
+ "#;
954
+ let parsed = parse_yaml(yaml).expect("yaml should parse");
955
+ assert_eq!(parsed.version, 1);
956
+ assert_eq!(parsed.routes.len(), 2);
957
+ assert_eq!(parsed.routes[0].name, "port-as-host");
958
+ assert_eq!(parsed.routes[0].r#match, "{port:int}.{domain}");
959
+ assert_eq!(parsed.routes[1].headers.as_ref().unwrap()["Host"], "{host}");
960
+
961
+ let compiled = compile(parsed.routes).unwrap();
962
+ let hit = match_host(&compiled, "3000.fbi.com").unwrap();
963
+ assert_eq!(hit.target, "127.0.0.1:3000");
964
+ }
965
+
966
+ #[test]
967
+ fn expand_passes_through_unknown_placeholders_silently() {
968
+ // expand() is internal but exercised here as a sanity check:
969
+ // a template referencing an unknown name returns the template
970
+ // minus the placeholder. (compile() rejects this, so users
971
+ // can't hit it; this just guards against panics in expand.)
972
+ let mut caps = HashMap::new();
973
+ caps.insert("a".to_string(), "X".to_string());
974
+ assert_eq!(expand("{a}-{b}", &caps), "X-");
975
+ }
976
+ }