@chrismo/superkit 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -4,17 +4,48 @@ Documentation, tutorials, and recipes for [SuperDB](https://superdb.org/).
4
4
 
5
5
  **Website:** [chrismo.github.io/superkit](https://chrismo.github.io/superkit/)
6
6
 
7
+ ## Install
8
+
9
+ ```bash
10
+ npm install -g @chrismo/superkit
11
+ ```
12
+
13
+ ## CLI Tools
14
+
15
+ - `skdoc` — Browse documentation (expert guide, upgrade guide, tutorials)
16
+ - `skgrok` — Search grok patterns
17
+ - `skops` — Browse recipe functions and operators
18
+
19
+ Also available via `npx skdoc`, `npx skgrok`, `npx skops`.
20
+
7
21
  ## Content
8
22
 
9
23
  - **Expert Guide** — Comprehensive SuperSQL syntax reference
10
24
  - **Upgrade Guide** — Migration guide from zq to SuperDB
11
25
  - **Tutorials** — Step-by-step guides for common patterns
26
+ - **Recipes** — Reusable SuperSQL functions and operators
27
+ - **Grok Patterns** — All SuperDB grok patterns
28
+
29
+ ## Library
30
+
31
+ The [SuperDB MCP server](https://github.com/chrismo/superdb-mcp) depends on
32
+ this package for its documentation tools. The TypeScript API is available for
33
+ other integrations:
34
+
35
+ ```typescript
36
+ import { superHelp, superRecipes, superGrokPatterns } from '@chrismo/superkit';
37
+ ```
12
38
 
13
- ## How it works
39
+ ## Upgrading from pre-npm SuperKit
14
40
 
15
- Content is authored in [superdb-mcp](https://github.com/chrismo/superdb-mcp) and auto-synced here via GitHub Action. The site is built with Jekyll using the [Just the Docs](https://just-the-docs.com/) theme and deployed via GitHub Pages.
41
+ If you previously installed SuperKit via the old `install.sh` script, remove
42
+ the legacy files:
16
43
 
17
- `changelog.jsup` is kept for historical reference from the original SuperKit project.
44
+ ```bash
45
+ rm -f ~/.local/bin/sk ~/.local/bin/skdoc ~/.local/bin/skgrok \
46
+ ~/.local/bin/skgrok.jsup ~/.local/bin/skops \
47
+ ~/.local/bin/skops.jsup ~/.local/bin/skops.spq
48
+ ```
18
49
 
19
50
  ## License
20
51
 
@@ -64,3 +64,92 @@ op sk_array_flatten: (
64
64
  | parse_sup(f'[{this}]')
65
65
  )
66
66
  ```
67
+
68
+ ---
69
+
70
+ ## sk_array_append
71
+
72
+ Appends a value to the end of an array.
73
+
74
+ **Type:** function
75
+
76
+ | Argument | Description |
77
+ |----------|-------------|
78
+ | `arr` | The array to append to. |
79
+ | `val` | The value to append. |
80
+
81
+ ```supersql
82
+ sk_array_append([1,2,3], 4)
83
+ -- => [1,2,3,4]
84
+
85
+ sk_array_append([], "a")
86
+ -- => ["a"]
87
+ ```
88
+
89
+ **Implementation:**
90
+
91
+ ```supersql
92
+ fn sk_array_append(arr, val): ([...arr, val])
93
+ ```
94
+
95
+ ---
96
+
97
+ ## sk_array_remove
98
+
99
+ Removes all occurrences of a value from an array.
100
+
101
+ **Type:** operator
102
+
103
+ | Argument | Description |
104
+ |----------|-------------|
105
+ | `val` | The value to remove. |
106
+
107
+ ```supersql
108
+ [1,2,3,2,1] | sk_array_remove 2
109
+ -- => [1,3,1]
110
+
111
+ ["a","b","c"] | sk_array_remove "b"
112
+ -- => ["a","c"]
113
+ ```
114
+
115
+ **Implementation:**
116
+
117
+ ```supersql
118
+ op sk_array_remove val: (
119
+ [unnest this | where this != val]
120
+ )
121
+ ```
122
+
123
+ ---
124
+
125
+ ## sk_deep_flatten
126
+
127
+ Recursively flattens nested arrays into a single flat array.
128
+
129
+ Unlike `sk_array_flatten` which only flattens one level, `sk_deep_flatten` recursively processes all nested arrays regardless of depth.
130
+
131
+ **Type:** operator
132
+
133
+ ```supersql
134
+ [[1,[2,3]],[4,[5,[6]]]] | sk_deep_flatten
135
+ -- => [1,2,3,4,5,6]
136
+
137
+ [1,[2],[[3]]] | sk_deep_flatten
138
+ -- => [1,2,3]
139
+ ```
140
+
141
+ **Implementation:**
142
+
143
+ ```supersql
144
+ op sk_deep_flatten: (
145
+ fn _df(v): (
146
+ case kind(v)
147
+ when "array" then (
148
+ [unnest [unnest v | _df(this)] | unnest this]
149
+ )
150
+ else [v]
151
+ end
152
+ )
153
+ _df(this)
154
+ )
155
+ ```
@@ -29,3 +29,52 @@ op sk_array_flatten: (
29
29
  | replace(this, ']','')
30
30
  | parse_sup(f'[{this}]')
31
31
  )
32
+
33
+ fn skdoc_array_append(): (
34
+ cast(
35
+ {name:"sk_array_append",
36
+ type:"func",
37
+ desc:"Appends a value to the end of an array.",
38
+ args:[{name:"arr",desc:"The array to append to."}
39
+ {name:"val",desc:"The value to append."}],
40
+ examples:[{i:"sk_array_append([1,2,3], 4)",o:"[1,2,3,4]"}
41
+ {i:"sk_array_append([], \"a\")",o:"[\"a\"]"}]}, <skdoc>)
42
+ )
43
+
44
+ fn sk_array_append(arr, val): ([...arr, val])
45
+
46
+ fn skdoc_array_remove(): (
47
+ cast(
48
+ {name:"sk_array_remove",
49
+ type:"op",
50
+ desc:"Removes all occurrences of a value from an array.",
51
+ args:[{name:"val",desc:"The value to remove."}],
52
+ examples:[{i:"[1,2,3,2,1] | sk_array_remove 2",o:"[1,3,1]"}
53
+ {i:"[\"a\",\"b\",\"c\"] | sk_array_remove \"b\"",o:"[\"a\",\"c\"]"}]}, <skdoc>)
54
+ )
55
+
56
+ op sk_array_remove val: (
57
+ [unnest this | where this != val]
58
+ )
59
+
60
+ fn skdoc_array_deep_flatten(): (
61
+ cast(
62
+ {name:"sk_deep_flatten",
63
+ type:"op",
64
+ desc:"Recursively flattens nested arrays into a single flat array.",
65
+ args:[],
66
+ examples:[{i:"[[1,[2,3]],[4,[5,[6]]]] | sk_deep_flatten",o:"[1,2,3,4,5,6]"}
67
+ {i:"[1,[2],[[3]]] | sk_deep_flatten",o:"[1,2,3]"}]}, <skdoc>)
68
+ )
69
+
70
+ op sk_deep_flatten: (
71
+ fn _df(v): (
72
+ case kind(v)
73
+ when "array" then (
74
+ [unnest [unnest v | _df(this)] | unnest this]
75
+ )
76
+ else [v]
77
+ end
78
+ )
79
+ _df(this)
80
+ )
@@ -49,3 +49,43 @@ fn sk_format_bytes(value): (
49
49
  (value == 0) ? "0 B" : _sk_format_nonzero_bytes(value)
50
50
  )
51
51
  ```
52
+
53
+ ---
54
+
55
+ ## sk_format_epoch
56
+
57
+ Converts Unix epoch milliseconds to a time value with timezone offset applied.
58
+
59
+ **Type:** function
60
+
61
+ | Argument | Description |
62
+ |----------|-------------|
63
+ | `epoch_ms` | Milliseconds since 1970-01-01 00:00:00 UTC. |
64
+ | `tz_offset` | Timezone offset string like '-0500' or '+0530'. |
65
+
66
+ ```supersql
67
+ sk_format_epoch(0, '+0000')
68
+ -- => 1970-01-01T00:00:00Z
69
+
70
+ sk_format_epoch(1704067200000, '-0500')
71
+ -- => 2023-12-31T19:00:00Z
72
+ ```
73
+
74
+ **Note:** SuperDB has no timezone-aware time type. The returned time value
75
+ displays as UTC but represents the local time with the offset already applied.
76
+ For display purposes only — do not use the result in further time arithmetic
77
+ that assumes UTC.
78
+
79
+ **Implementation:**
80
+
81
+ ```supersql
82
+ fn sk_format_epoch(epoch_ms, tz_offset): (
83
+ {
84
+ sign: tz_offset[0:1],
85
+ hours: tz_offset[1:3]::int64,
86
+ mins: tz_offset[3:5]::int64,
87
+ base_time: (epoch_ms * 1000000)::time
88
+ }
89
+ | this.base_time + f'{this.sign == "-" ? "-" : ""}{this.hours}h{this.mins > 0 ? f"{this.mins}m" : ""}'::duration
90
+ )
91
+ ```
@@ -22,3 +22,24 @@ fn skdoc_format_bytes(): (
22
22
  fn sk_format_bytes(value): (
23
23
  (value == 0) ? "0 B" : _sk_format_nonzero_bytes(value)
24
24
  )
25
+
26
+ fn skdoc_format_epoch(): (
27
+ cast(
28
+ {name:"sk_format_epoch",
29
+ type:"func",
30
+ desc:"Converts Unix epoch milliseconds to a time value with timezone offset applied.",
31
+ args:[{name:"epoch_ms",desc:"Milliseconds since 1970-01-01 00:00:00 UTC."}
32
+ {name:"tz_offset",desc:"Timezone offset string like '-0500' or '+0530'."}],
33
+ examples:[{i:"sk_format_epoch(0, '+0000')",o:"1970-01-01T00:00:00Z"}
34
+ {i:"sk_format_epoch(1704067200000, '-0500')",o:"2023-12-31T19:00:00Z"}]}, <skdoc>)
35
+ )
36
+
37
+ fn sk_format_epoch(epoch_ms, tz_offset): (
38
+ {
39
+ sign: tz_offset[0:1],
40
+ hours: tz_offset[1:3]::int64,
41
+ mins: tz_offset[3:5]::int64,
42
+ base_time: (epoch_ms * 1000000)::time
43
+ }
44
+ | this.base_time + f'{this.sign == "-" ? "-" : ""}{this.hours}h{this.mins > 0 ? f"{this.mins}m" : ""}'::duration
45
+ )
@@ -99,3 +99,45 @@ fn sk_max(a, b): (
99
99
  a > b ? a : b
100
100
  )
101
101
  ```
102
+
103
+ ---
104
+
105
+ ## sk_last_day_of_month
106
+
107
+ Returns the last day number (28-31) of the given month and year. Correctly handles leap years.
108
+
109
+ **Type:** function
110
+
111
+ | Argument | Description |
112
+ |----------|-------------|
113
+ | `year` | The year (e.g. 2024). |
114
+ | `month` | The month number (1-12). |
115
+
116
+ ```supersql
117
+ sk_last_day_of_month(2024, 2)
118
+ -- => 29
119
+
120
+ sk_last_day_of_month(2023, 2)
121
+ -- => 28
122
+
123
+ sk_last_day_of_month(2024, 12)
124
+ -- => 31
125
+
126
+ sk_last_day_of_month(2024, 4)
127
+ -- => 30
128
+ ```
129
+
130
+ Works by constructing the first day of the next month as a time value, subtracting one day, then extracting the day number from the resulting date string.
131
+
132
+ **Implementation:**
133
+
134
+ ```supersql
135
+ fn sk_last_day_of_month(year, month): (
136
+ -- Returns the last day number of the given month
137
+ {
138
+ nm: month == 12 ? 1 : month + 1,
139
+ ny: month == 12 ? year + 1 : year
140
+ }
141
+ | ((f'{this.ny}-{this.nm > 9 ? "" : "0"}{this.nm}-01T00:00:00Z'::time - 1d)::string)[8:10]::uint8
142
+ )
143
+ ```
@@ -51,3 +51,25 @@ fn skdoc_max(): (
51
51
  fn sk_max(a, b): (
52
52
  a > b ? a : b
53
53
  )
54
+
55
+ fn skdoc_last_day_of_month(): (
56
+ cast(
57
+ {name:"sk_last_day_of_month",
58
+ type:"func",
59
+ desc:"Returns the last day number (28-31) of the given month and year.",
60
+ args:[{name:"year",desc:"The year (e.g. 2024)."}
61
+ {name:"month",desc:"The month number (1-12)."}],
62
+ examples:[{i:"sk_last_day_of_month(2024, 2)",o:"29"}
63
+ {i:"sk_last_day_of_month(2023, 2)",o:"28"}
64
+ {i:"sk_last_day_of_month(2024, 12)",o:"31"}
65
+ {i:"sk_last_day_of_month(2024, 4)",o:"30"}]}, <skdoc>)
66
+ )
67
+
68
+ fn sk_last_day_of_month(year, month): (
69
+ -- Returns the last day number of the given month
70
+ {
71
+ nm: month == 12 ? 1 : month + 1,
72
+ ny: month == 12 ? year + 1 : year
73
+ }
74
+ | ((f'{this.ny}-{this.nm > 9 ? "" : "0"}{this.nm}-01T00:00:00Z'::time - 1d)::string)[8:10]::uint8
75
+ )
@@ -142,6 +142,79 @@ fn sk_pad_right(s, pad_char, target_length): (
142
142
 
143
143
  ---
144
144
 
145
+ ## sk_left
146
+
147
+ Returns the first n characters of a string.
148
+
149
+ **Type:** function
150
+
151
+ | Argument | Description |
152
+ |----------|-------------|
153
+ | `s` | The string. |
154
+ | `n` | Number of characters from the left. |
155
+
156
+ ```supersql
157
+ sk_left('hello', 3)
158
+ -- => 'hel'
159
+ ```
160
+
161
+ **Implementation:**
162
+
163
+ ```supersql
164
+ fn sk_left(s, n): (sk_slice(s, 0, sk_clamp(n, 0, len(s))))
165
+ ```
166
+
167
+ ---
168
+
169
+ ## sk_right
170
+
171
+ Returns the last n characters of a string.
172
+
173
+ **Type:** function
174
+
175
+ | Argument | Description |
176
+ |----------|-------------|
177
+ | `s` | The string. |
178
+ | `n` | Number of characters from the right. |
179
+
180
+ ```supersql
181
+ sk_right('hello', 3)
182
+ -- => 'llo'
183
+ ```
184
+
185
+ **Implementation:**
186
+
187
+ ```supersql
188
+ fn sk_right(s, n): (sk_slice(s, len(s) - sk_clamp(n, 0, len(s)), len(s)))
189
+ ```
190
+
191
+ ---
192
+
193
+ ## sk_mid
194
+
195
+ Returns n characters from a string starting at a given position.
196
+
197
+ **Type:** function
198
+
199
+ | Argument | Description |
200
+ |----------|-------------|
201
+ | `s` | The string. |
202
+ | `start` | Starting index, zero-based. |
203
+ | `n` | Number of characters to return. |
204
+
205
+ ```supersql
206
+ sk_mid('hello world', 6, 5)
207
+ -- => 'world'
208
+ ```
209
+
210
+ **Implementation:**
211
+
212
+ ```supersql
213
+ fn sk_mid(s, start, n): (sk_slice(s, sk_clamp(start, 0, len(s)), sk_clamp(start, 0, len(s)) + sk_clamp(n, 0, len(s))))
214
+ ```
215
+
216
+ ---
217
+
145
218
  ## sk_urldecode
146
219
 
147
220
  URL decoder for SuperDB. Splits on `%`, decodes each hex-encoded segment, and joins back together.
@@ -77,6 +77,43 @@ fn sk_pad_right(s, pad_char, target_length): (
77
77
  len(s) < target_length ? sk_pad_right(f'{s}{pad_char}', pad_char, target_length) : s
78
78
  )
79
79
 
80
+ fn skdoc_left(): (
81
+ cast(
82
+ {name:"sk_left",
83
+ type:"func",
84
+ desc:"Returns the first n characters of a string.",
85
+ args:[{name:"s",desc:"The string."}
86
+ {name:"n",desc:"Number of characters from the left."}],
87
+ examples:[{i:"sk_left('hello', 3)",o:"'hel'"}]}, <skdoc>)
88
+ )
89
+
90
+ fn sk_left(s, n): (sk_slice(s, 0, sk_clamp(n, 0, len(s))))
91
+
92
+ fn skdoc_right(): (
93
+ cast(
94
+ {name:"sk_right",
95
+ type:"func",
96
+ desc:"Returns the last n characters of a string.",
97
+ args:[{name:"s",desc:"The string."}
98
+ {name:"n",desc:"Number of characters from the right."}],
99
+ examples:[{i:"sk_right('hello', 3)",o:"'llo'"}]}, <skdoc>)
100
+ )
101
+
102
+ fn sk_right(s, n): (sk_slice(s, len(s) - sk_clamp(n, 0, len(s)), len(s)))
103
+
104
+ fn skdoc_mid(): (
105
+ cast(
106
+ {name:"sk_mid",
107
+ type:"func",
108
+ desc:"Returns n characters from a string starting at a given position.",
109
+ args:[{name:"s",desc:"The string."}
110
+ {name:"start",desc:"Starting index, zero-based."}
111
+ {name:"n",desc:"Number of characters to return."}],
112
+ examples:[{i:"sk_mid('hello world', 6, 5)",o:"'world'"}]}, <skdoc>)
113
+ )
114
+
115
+ fn sk_mid(s, start, n): (sk_slice(s, sk_clamp(start, 0, len(s)), sk_clamp(start, 0, len(s)) + sk_clamp(n, 0, len(s))))
116
+
80
117
  -- TODO: skdoc_urldecode
81
118
 
82
119
  -- URL Decoder for SuperDB
@@ -383,7 +383,7 @@ echo '{id:1,person_id:1,exercise:"tango"}
383
383
  {id:4,person_id:2,exercise:"cooking"}' > exercises.sup
384
384
 
385
385
  # joins supported: left, right, inner, full outer, anti
386
- super -c "
386
+ super -s -c "
387
387
  select * from people.json people
388
388
  join exercises.sup exercises
389
389
  on people.id=exercises.person_id
@@ -391,7 +391,7 @@ super -c "
391
391
 
392
392
  # where ... is null not supported yet
393
393
  # unless coalesce used in the select clause
394
- super -c "
394
+ super -s -c "
395
395
  select * from people.json people
396
396
  left join exercises.sup exercises
397
397
  on people.id=exercises.person_id
@@ -423,6 +423,159 @@ _current_tasks "| where done==true" | super -s -c "count()" -
423
423
  _current_tasks | super -s -c "where done==true | count()" -
424
424
  ```
425
425
 
426
+ ## Advanced Patterns
427
+
428
+ ### Finding Syntax Errors in .sup Files
429
+
430
+ Read each line as a raw string and test it individually with `parse_sup()`.
431
+ The first error reported is the real problem:
432
+
433
+ ```
434
+ super -i line -j -c '
435
+ values {raw: this, parsed: parse_sup(this)}
436
+ | where is_error(parsed)
437
+ | cut raw
438
+ ' broken-file.sup
439
+ ```
440
+
441
+ ### Crosstab Pattern
442
+
443
+ SQL crosstab using CASE/WHEN to pivot rows into columns:
444
+
445
+ ```sql
446
+ SELECT
447
+ coalesce(category, 'Total') as _,
448
+ SUM(CASE WHEN win = true THEN count ELSE 0 END) AS win,
449
+ SUM(CASE WHEN win = false THEN count ELSE 0 END) AS loss
450
+ GROUP BY _
451
+ ```
452
+
453
+ ### Fork and Join for Inline Data
454
+
455
+ Use `fork` with inline structured data to split and rejoin streams:
456
+
457
+ ```
458
+ values {
459
+ data:[{id:1,s:'a'},{id:2,s:'b'},{id:3,s:'c'}],
460
+ match:[{id:2},{id:3}]
461
+ }
462
+ | fork
463
+ ( unnest data )
464
+ ( unnest match )
465
+ | inner join on left.id=right.id
466
+ ```
467
+
468
+ See the [Subqueries tutorial](tutorials/subqueries) for fork-and-join as a
469
+ streamable alternative to `collect`-based correlated subqueries, and
470
+ [Moar Subqueries](tutorials/moar_subqueries) for the collect-first "go up
471
+ before drilling down" pattern.
472
+
473
+ ### Aggregate Filters
474
+
475
+ Use `filter (expr)` on aggregate functions for conditional aggregation.
476
+ Non-matches produce a count of 0 instead of empty output:
477
+
478
+ ```
479
+ -- Count of 0 for non-matches:
480
+ values 1, 2 | count() filter (this == 3)
481
+ -- => 0
482
+
483
+ -- Conditional collection:
484
+ unnest [{dir:"out",v:"90"},{dir:"in",v:"561"},{dir:"in",v:"306"}]
485
+ | in_vals:=collect(v) filter (dir=="in"),
486
+ out_vals:=collect(v) filter (dir=="out")
487
+ ```
488
+
489
+ ### Record vs Map Types
490
+
491
+ Key distinction between Records and Maps:
492
+
493
+ - `{a:1}` is a Record (unquoted keys)
494
+ - `|{"a":1}|` is a Map (literal primitive keys, pipe delimiters)
495
+ - `put` and spread only work with Records, not Maps or Unions
496
+ - Map keys must be literal primitive types
497
+ - `collect_map` requires a Map Expression argument — use `|{key:val}|` syntax,
498
+ not a Record expression
499
+
500
+ ### Converting Map to Record
501
+
502
+ Maps and Records are separate types. To convert a `collect_map` result to a
503
+ Record for use with `put`/spread, strip the pipe delimiters and re-parse:
504
+
505
+ ```
506
+ -- collect_map produces a Map:
507
+ values {k:"a",v:1}, {k:"b",v:2} | collect_map(|{k:v}|)
508
+ -- => |{"a":1,"b":2}|
509
+
510
+ -- Convert Map to Record:
511
+ values {k:"a",v:1}, {k:"b",v:2}
512
+ | collect_map(|{k:v}|)
513
+ | this::string | this[1:-1] | parse_sup(this)
514
+ -- => {a:1,b:2}
515
+ ```
516
+
517
+ ### Which Builtins Need Explicit `this`
518
+
519
+ Most aggregate functions need `this` passed in explicitly:
520
+
521
+ - **Implicit** (no argument): `count()`
522
+ - **Explicit**: `and(this)`, `any(this)`, `avg(this)`, `collect(this)`,
523
+ `dcount(this)`, `fuse(this)`, `max(this)`, `min(this)`, `or(this)`,
524
+ `sum(this)`, `union(this)`
525
+ - **Oddball**: `collect_map(|{key:val}|)` — needs a Map Expression, not `this`
526
+
527
+ Functions that changed in 0.1.0 to require explicit `this`:
528
+ - `grep('pattern', this)` (was `grep(/pattern/)`)
529
+ - `is(this, <type>)` (was `is(<type>)`)
530
+ - `nest_dotted(this)` (was `nest_dotted()`)
531
+
532
+ ### Expressions Inside `put`
533
+
534
+ Pipelines work inside `put` expressions — no lateral subquery hack needed:
535
+
536
+ ```
537
+ values {arn:"arn:aws:kms:us-east-1:000000000000:key/abc123"}
538
+ | put region:=(split(this.arn, ':') | this[3])
539
+ -- => {arn:"arn:aws:kms:us-east-1:000000000000:key/abc123",region:"us-east-1"}
540
+ ```
541
+
542
+ Direct indexing also works: `put region:=split(this.arn, ':')[3]`
543
+
544
+ ### String Slicing
545
+
546
+ SuperDB uses exclusive end index (like Python):
547
+
548
+ ```
549
+ "aoeusnth"[0:-1]
550
+ -- => "aoeusnt" (last char excluded)
551
+ "aoeusnth"[0:]
552
+ -- => "aoeusnth" (full string)
553
+ ```
554
+
555
+ ### search vs where for Regex
556
+
557
+ - `search 'pattern'` — search all fields
558
+ - `where grep('pattern', this)` — filter with regex in where clause
559
+ - No `=~` operator exists in SuperDB
560
+
561
+ ### Deep Walk (Recursive Transformation)
562
+
563
+ A recursive function that walks nested structures, applying a
564
+ transformation at every leaf:
565
+
566
+ ```
567
+ fn walk(v):
568
+ case kind(v)
569
+ when "array" then
570
+ [unnest v | walk(this)]
571
+ when "record" then
572
+ unflatten([unnest flatten(v) | {key,value:walk(value)}])
573
+ else v+1
574
+ end
575
+ values walk([{x:[1,2]},{y:3}])
576
+ -- => [{x:[2,3]},{y:4}]
577
+ ```
578
+
426
579
  ## Advanced SuperDB Features
427
580
 
428
581
  ### Type System
@@ -548,8 +701,12 @@ super -s -c "{a:{c:1}, b:{d:'foo'}} | {...a, ...b}" # => {c:1, d:'foo'}
548
701
 
549
702
  - Check for a trailing `-` without stdin
550
703
  - Check for no trailing `-` with stdin (sometimes you get output anyway but this is usually wrong!)
704
+ - Watch for trailing `-` inside bash loops — `while IFS= read -r line` provides
705
+ stdin, so a `super -c "..." -` inside the loop will consume it instead of the
706
+ pipe. Drop the `-` if the command doesn't need stdin input.
551
707
  - Verify field names match exactly (case-sensitive)
552
708
  - Check type mismatches in comparisons
709
+ - `collect()` on empty stream returns `null` (not empty) — guard with `coalesce(result, [])`
553
710
 
554
711
  2. **Type Errors**
555
712
 
@@ -654,13 +811,13 @@ Converting numeric values (like milliseconds) to duration types uses f-string in
654
811
 
655
812
  ```bash
656
813
  # Convert milliseconds to duration
657
- super -c "values 993958 | values f'{this}ms'::duration"
814
+ super -s -c "values 993958 | values f'{this}ms'::duration"
658
815
 
659
816
  # Convert to seconds first, then duration
660
- super -c "values 993958 / 1000 | values f'{this}s'::duration"
817
+ super -s -c "values 993958 / 1000 | values f'{this}s'::duration"
661
818
 
662
819
  # Round duration to buckets (e.g., 15 minute chunks)
663
- super -c "values 993958 / 1000 | values f'{this}s'::duration | bucket(this, 15m)"
820
+ super -s -c "values 993958 / 1000 | values f'{this}s'::duration | bucket(this, 15m)"
664
821
  ```
665
822
 
666
823
  **Key points:**
@@ -680,16 +837,16 @@ SuperDB uses `::type` syntax for type conversions (not function calls):
680
837
 
681
838
  ```bash
682
839
  # Integer conversion (truncates decimals)
683
- super -c "values 1234.56::int64" # outputs: 1234
840
+ super -s -c "values 1234.56::int64" # outputs: 1234
684
841
 
685
842
  # String conversion
686
- super -c "values 42::string" # outputs: "42"
843
+ super -s -c "values 42::string" # outputs: "42"
687
844
 
688
845
  # Float conversion
689
- super -c "values 100::float64" # outputs: 100.0
846
+ super -s -c "values 100::float64" # outputs: 100.0
690
847
 
691
848
  # Chaining casts
692
- super -c "values (123.45::int64)::string" # outputs: "123"
849
+ super -s -c "values (123.45::int64)::string" # outputs: "123"
693
850
  ```
694
851
 
695
852
  **Important:**
@@ -729,13 +886,13 @@ SuperDB has a `round()` function that rounds to the nearest integer:
729
886
 
730
887
  ```bash
731
888
  # Round to nearest integer (single argument only)
732
- super -c "values round(3.14)" # outputs: 3.0
733
- super -c "values round(-1.5)" # outputs: -2.0
734
- super -c "values round(1234.567)" # outputs: 1235.0
889
+ super -s -c "values round(3.14)" # outputs: 3.0
890
+ super -s -c "values round(-1.5)" # outputs: -2.0
891
+ super -s -c "values round(1234.567)" # outputs: 1235.0
735
892
 
736
893
  # For rounding to specific decimal places, use the multiply-cast-divide pattern
737
- super -c "values ((1234.567 * 100)::int64 / 100.0)" # outputs: 1234.56 (2 decimals)
738
- super -c "values ((1234.567 * 10)::int64 / 10.0)" # outputs: 1234.5 (1 decimal)
894
+ super -s -c "values ((1234.567 * 100)::int64 / 100.0)" # outputs: 1234.56 (2 decimals)
895
+ super -s -c "values ((1234.567 * 10)::int64 / 10.0)" # outputs: 1234.5 (1 decimal)
739
896
  ```
740
897
 
741
898
  **Key points:**
@@ -11,7 +11,7 @@ last_updated: "2026-02-17"
11
11
 
12
12
  # Getting Bash Text into SuperDB
13
13
 
14
- The companion to [sup_to_bash](sup_to_bash.md), this covers the reverse: safely
14
+ The companion to [sup_to_bash]({% link docs/tutorials/sup_to_bash.md %}), this covers the reverse: safely
15
15
  getting raw text from Bash into SuperDB.
16
16
 
17
17
  ## The Problem
@@ -0,0 +1,73 @@
1
+ ---
2
+ title: "Cloudflare Log Durations"
3
+ name: cloudflare-durations
4
+ description: "Parsing Cloudflare edge timestamps, computing request durations, and bucketing for analysis."
5
+ layout: default
6
+ nav_order: 12
7
+ parent: Tutorials
8
+ superdb_version: "0.3.0"
9
+ last_updated: "2026-04-05"
10
+ ---
11
+
12
+ # Cloudflare Log Durations
13
+
14
+ *Narrative tutorial — examples reference external Cloudflare log data.*
15
+
16
+ Many Cloudflare log entries include edge timestamps like `@EdgeStartTimestamp`
17
+ and `@EdgeEndTimestamp`. Computing request durations from these is a common
18
+ analysis task — and a good example of SuperDB's string cleaning, time parsing,
19
+ and bucketing capabilities.
20
+
21
+ ## The Problem
22
+
23
+ Cloudflare timestamps often arrive with extra escaping:
24
+
25
+ ```
26
+ "@EdgeStartTimestamp":"\"2025-04-22T18:16:46Z\""
27
+ ```
28
+
29
+ We need to strip the escaped quotes, parse as time values, compute durations,
30
+ and then analyze the distribution.
31
+
32
+ ## Step 1: Clean and Compute Durations
33
+
34
+ ```bash
35
+ super -s -c "
36
+ drop Message, Service, Env
37
+ | start := regexp_replace(this['@EdgeStartTimestamp'], '[^A-Z0-9-:]', ''),
38
+ end := regexp_replace(this['@EdgeEndTimestamp'], '[^A-Z0-9-:]', '')
39
+ | start := start::time, end := end::time
40
+ | dur := end - start
41
+ | cut start, end, dur
42
+ " cloudflare-extract.csv > cf-durations.sup
43
+ ```
44
+
45
+ Key techniques:
46
+ - `regexp_replace` strips everything except alphanumerics, hyphens, and colons
47
+ - `::time` casts the cleaned strings to time values
48
+ - Duration is simply `end - start` — SuperDB handles time arithmetic natively
49
+
50
+ ## Step 2: Bucket and Analyze
51
+
52
+ ```bash
53
+ super -s -c "
54
+ log_count := collect(this) by bucket(dur, 3s)
55
+ | log_count := len(log_count)
56
+ | sort bucket
57
+ " cf-durations.sup
58
+ ```
59
+
60
+ This groups requests into 3-second duration buckets and counts how many fall
61
+ into each, giving a histogram of request latencies.
62
+
63
+ ## Variations
64
+
65
+ Adjust the bucket size for different granularity:
66
+
67
+ ```bash
68
+ -- Fine-grained: 500ms buckets
69
+ super -s -c "count() by bucket(dur, 500ms) | sort bucket" cf-durations.sup
70
+
71
+ -- Coarse: 30s buckets
72
+ super -s -c "count() by bucket(dur, 30s) | sort bucket" cf-durations.sup
73
+ ```
@@ -1,16 +1,50 @@
1
1
  ---
2
2
  title: "Moar Subqueries"
3
3
  name: moar-subqueries
4
- description: "Additional subquery patterns including fork and full sub-selects."
4
+ description: "Additional subquery patterns including collect-first, fork, and full sub-selects."
5
5
  layout: default
6
6
  nav_order: 10
7
7
  parent: Tutorials
8
- superdb_version: "0.2.0"
9
- last_updated: "2026-02-15"
8
+ superdb_version: "0.3.0"
9
+ last_updated: "2026-04-05"
10
10
  ---
11
11
 
12
12
  # Moar Subqueries
13
13
 
14
+ ## Collect-First Pattern ("Go Up Before Drilling Down")
15
+
16
+ A common problem: you need to both aggregate the full dataset AND filter it
17
+ based on those aggregation results. But SuperDB streams data — once it's
18
+ consumed, it's gone.
19
+
20
+ The collect-first pattern solves this by buffering everything into a single
21
+ record, then using lateral subqueries to derive summaries while keeping access
22
+ to all the data:
23
+
24
+ ```
25
+ from data.json
26
+ | collect(this) | {data: this}
27
+ | put top_ten := [
28
+ unnest data
29
+ | aggregate count := count() by table
30
+ | sort -r count
31
+ | head 10
32
+ | values table
33
+ ]
34
+ | unnest data
35
+ | where table in top_ten
36
+ | aggregate count := count() by table, bucket(ts, 1h)
37
+ | sort table, bucket
38
+ ```
39
+
40
+ The idea: collect everything first ("go up"), derive what you need (top ten
41
+ tables), then drill back down into the raw data using those results as a filter.
42
+
43
+ **Tradeoff:** This buffers the entire dataset into memory. For large datasets,
44
+ consider the fork-and-join approach from
45
+ [Subqueries]({% link docs/tutorials/subqueries.md %}) instead, which stays
46
+ streamable.
47
+
14
48
  ## Fork
15
49
 
16
50
  One hassle to this approach is the limit of 2 forks. Nesting forks works, but
@@ -18,8 +52,8 @@ makes constructing this query a bit more difficult.
18
52
 
19
53
  ## Full Sub-Selects
20
54
 
21
- As of 20250815 build, this is much, much slower. I'm guessing it's doing a full
22
- reload of the data file each time.
55
+ Much slower than pipe-style subqueries because the data file gets re-read each
56
+ time.
23
57
 
24
58
  ```
25
59
  select
@@ -17,8 +17,6 @@ superdb.
17
17
 
18
18
  ## Correlated Subqueries
19
19
 
20
- [//]: # (TODO: file versions - phil's versions from Slack - NOT versions - issue #54)
21
-
22
20
  Let's start with this simple dataset:
23
21
 
24
22
  ```json lines
@@ -129,6 +127,57 @@ super -s -c '
129
127
  {id:4,date:"2025-02-28",foo:9}
130
128
  ```
131
129
 
130
+ ### Fork-and-Join: A Streamable Alternative
131
+
132
+ The lateral subquery approach above uses `collect` to buffer the entire input
133
+ into a single value before iterating. This works well for small datasets, but
134
+ `collect` has limits on how large a single value can be. For larger datasets,
135
+ a fork-and-join approach avoids that limitation by keeping things streamable.
136
+
137
+ The idea is a self-join: raw data on one side, aggregated data on the other,
138
+ joined on the matching fields.
139
+
140
+ ```mdtest-command
141
+ super -s -c '
142
+ from data.json
143
+ | inner join (
144
+ from data.json
145
+ | foo := max(foo) by date
146
+ ) on {left.date, left.foo}={right.date, right.foo}
147
+ | values left
148
+ | sort date'
149
+ ```
150
+ ```mdtest-output
151
+ {id:1,date:"2025-02-27",foo:3}
152
+ {id:4,date:"2025-02-28",foo:9}
153
+ ```
154
+
155
+ This can also use `fork` to read the input once instead of naming the file
156
+ twice:
157
+
158
+ ```mdtest-command
159
+ super -s -c '
160
+ from data.json
161
+ | fork
162
+ ( pass )
163
+ ( foo := max(foo) by date )
164
+ | inner join on {left.date, left.foo}={right.date, right.foo}
165
+ | values left
166
+ | sort date'
167
+ ```
168
+ ```mdtest-output
169
+ {id:1,date:"2025-02-27",foo:3}
170
+ {id:4,date:"2025-02-28",foo:9}
171
+ ```
172
+
173
+ With `fork`, the data flows through a single unnamed input — one branch
174
+ passes records through, the other aggregates. The multi-field join key uses
175
+ the `{left.x, left.y}={right.x, right.y}` record syntax (see
176
+ [multi-value joins](../join/#multi-value-joins)).
177
+
178
+ The tradeoff: fork-and-join is more verbose, but it avoids the `collect`
179
+ size limit and works with streaming pipelines.
180
+
132
181
  ## Subquery with Related Data Join
133
182
 
134
183
  A more realistic scenario: find the records with the top `score` per date, and
@@ -49,6 +49,7 @@ This table covers ALL breaking changes. Complex items reference detailed section
49
49
  | count type | returns `uint64` | returns `int64` |
50
50
  | Dynamic from | `from pool` | `from f'{pool}'` (see section) |
51
51
  | BSUP format | BSUP v1 | BSUP v2 (v1 no longer readable) |
52
+ | collect (empty) | no output on empty stream | returns `null` (see section) |
52
53
  | collect/union | preserves all errors | drops `error("quiet")` values |
53
54
  | concat/f-strings | errors propagate | `null` values ignored |
54
55
 
@@ -456,6 +457,28 @@ super-0.2.0 -s data.bsup > data.sup
456
457
  super -f bsup data.sup > data-v2.bsup
457
458
  ```
458
459
 
460
+ ### collect on empty stream returns null
461
+
462
+ In 0.1.0+, `collect()` on an empty stream returns `null` instead of producing
463
+ no output. This can cause subtle downstream bugs — `this in null` drops all
464
+ records instead of preserving them:
465
+
466
+ ```
467
+ -- Empty collect returns null:
468
+ values [1,2,3] | unnest this | where false | collect(this)
469
+ -- Returns: null
470
+
471
+ -- Downstream gotcha: "not in null" filters out everything:
472
+ values ["a","b","c"] | unnest this | where not (this in null) | collect(this)
473
+ -- Returns: null (all records dropped!)
474
+
475
+ -- Guard with coalesce or check for empty array:
476
+ values ["a","b","c"] | unnest this
477
+ | where not (this in coalesce(null, []))
478
+ | collect(this)
479
+ -- Returns: ["a","b","c"]
480
+ ```
481
+
459
482
  ### collect and union drop quiet errors
460
483
 
461
484
  In `collect` and `union` aggregate functions, `error("quiet")` values are now
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@chrismo/superkit",
3
- "version": "1.0.0",
3
+ "version": "1.1.0",
4
4
  "description": "SuperDB toolkit — docs, recipes, grok patterns, and CLI tools for the super binary",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -43,4 +43,4 @@
43
43
  "engines": {
44
44
  "node": ">=18.0.0"
45
45
  }
46
- }
46
+ }