@hamelin.sh/compiler 0.3.5 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.d.ts CHANGED
@@ -1,49 +1,25 @@
1
1
  /* tslint:disable */
2
2
  /* eslint-disable */
3
- type BuildCatalogError = { kind: "catalogInit"; message: string } | { kind: "compilation"; name: string; errors: ContextualTranslationErrors } | { kind: "datasetParse"; name: string; message: string };
4
-
5
- type CompileQueryResult = { Ok: QueryTranslation } | { Err: ContextualTranslationErrors };
6
-
7
- interface CatalogResource {
8
- name: string;
9
- query: string;
10
- }
11
-
12
3
  interface BuildCatalogOutput {
13
4
  catalog: Catalog;
14
5
  errors: BuildCatalogError[];
15
6
  }
16
7
 
17
- type QueryDatasetsResult = { Ok: string[] } | { Err: ContextualTranslationErrors };
18
-
19
- type Level = "Error" | "Warning" | "Info";
20
-
21
- interface ContextualTranslationErrors {
22
- hamelin: string;
23
- errors: ContextualTranslationError[];
24
- }
25
-
26
- interface TranslationError {
27
- area: LanguageArea | undefined;
28
- stage: Stage;
29
- level: Level;
30
- primary: Context;
31
- supporting: Context[] | undefined;
32
- source_desc: string | undefined;
8
+ interface CatalogResource {
9
+ name: string;
10
+ query: string;
11
+ isUnion: boolean;
33
12
  }
34
13
 
35
- type LanguageArea = "FunctionCall" | "Operator" | "Deref" | "IndexAccess" | "Parsing";
36
-
37
- interface ContextualTranslationError {
38
- error: TranslationError;
39
- pretty: string;
14
+ interface Column {
15
+ name: string;
16
+ type: HamelinType;
40
17
  }
41
18
 
42
- type Stage = "Translation" | "Parsing" | "SemanticAnalysis";
43
-
44
- interface Context {
45
- interval: { start: number; end: number };
46
- message: string;
19
+ interface Completion {
20
+ at: { start: number; end: number };
21
+ filter: boolean | undefined;
22
+ items: CompletionItem[];
47
23
  }
48
24
 
49
25
  interface CompletionItem {
@@ -74,8 +50,14 @@ interface CompletionItem {
74
50
  section: string | undefined;
75
51
  }
76
52
 
77
- interface QueryTranslation {
78
- translation: Translation;
53
+ interface Context {
54
+ interval: { start: number; end: number };
55
+ message: string;
56
+ }
57
+
58
+ interface ContextualCompletion {
59
+ pretty: string;
60
+ completion: Completion;
79
61
  }
80
62
 
81
63
  interface ContextualResult {
@@ -85,67 +67,95 @@ interface ContextualResult {
85
67
  translation: QueryTranslation | undefined;
86
68
  }
87
69
 
88
- interface ContextualCompletion {
70
+ interface ContextualTranslationError {
71
+ error: TranslationError;
89
72
  pretty: string;
90
- completion: Completion;
91
73
  }
92
74
 
93
- interface Completion {
94
- at: { start: number; end: number };
95
- filter: boolean | undefined;
96
- items: CompletionItem[];
75
+ interface ContextualTranslationErrors {
76
+ hamelin: string;
77
+ errors: ContextualTranslationError[];
97
78
  }
98
79
 
99
- type CompletionItemKind = "Text" | "Variable" | "Function" | "Command" | "Keyword";
80
+ interface FunctionDescription {
81
+ name: string;
82
+ parameters: string;
83
+ }
84
+
85
+ interface QueryTranslation {
86
+ translation: Translation;
87
+ }
100
88
 
101
89
  interface Translation {
102
90
  sql: string;
103
91
  columns: Column[];
104
92
  }
105
93
 
106
- type HamelinType = "binary" | "boolean" | "interval" | "calendar_interval" | "int" | "double" | "rows" | "string" | "timestamp" | "unknown" | { decimal: { precision: number; scale: number } } | { array: { element_type: HamelinType } } | { map: { key_type: HamelinType; value_type: HamelinType } } | { tuple: { elements: HamelinType[] } } | "variant" | { range: { of: HamelinType } } | { struct: Column[] };
107
-
108
- interface Column {
109
- name: string;
110
- type: HamelinType;
94
+ interface TranslationError {
95
+ area: LanguageArea | undefined;
96
+ stage: Stage;
97
+ level: Level;
98
+ primary: Context;
99
+ supporting: Context[] | undefined;
100
+ source_desc: string | undefined;
111
101
  }
112
102
 
103
+ type BuildCatalogError = { kind: "catalogInit"; message: string } | { kind: "compilation"; name: string; errors: ContextualTranslationErrors } | { kind: "datasetParse"; name: string; message: string };
104
+
113
105
  type Catalog = Record<string, Column[]>;
114
106
 
115
- interface FunctionDescription {
116
- name: string;
117
- parameters: string;
118
- }
107
+ type CompileQueryResult = { Ok: QueryTranslation } | { Err: ContextualTranslationErrors };
108
+
109
+ type CompletionItemKind = "Text" | "Variable" | "Function" | "Command" | "Keyword";
110
+
111
+ type HamelinType = "binary" | "boolean" | "interval" | "calendar_interval" | "int" | "double" | "rows" | "string" | "timestamp" | "unknown" | { decimal: { precision: number; scale: number } } | { array: { element_type: HamelinType } } | { map: { key_type: HamelinType; value_type: HamelinType } } | { tuple: { elements: HamelinType[] } } | "variant" | { range: { of: HamelinType } } | { struct: Column[] };
112
+
113
+ type LanguageArea = "FunctionCall" | "Operator" | "Deref" | "IndexAccess" | "Parsing";
114
+
115
+ type Level = "Error" | "Warning" | "Info";
116
+
117
+ type QueryDatasetsResult = { Ok: string[] } | { Err: ContextualTranslationErrors };
118
+
119
+ type Stage = "Translation" | "Parsing" | "SemanticAnalysis";
119
120
 
120
121
 
122
+ /**
123
+ * WASM-facing wrapper around `hamelin_lib::CatalogProvider`.
124
+ */
121
125
  declare class CatalogProvider {
122
- private constructor();
123
- free(): void;
124
- [Symbol.dispose](): void;
125
- static try_from_catalog(catalog: Catalog): CatalogProvider;
126
+ private constructor();
127
+ free(): void;
128
+ [Symbol.dispose](): void;
129
+ static try_from_catalog(catalog: Catalog): CatalogProvider;
126
130
  }
127
131
 
132
+ /**
133
+ * WASM-facing wrapper around `hamelin_lib::Compiler`.
134
+ */
128
135
  declare class Compiler {
129
- free(): void;
130
- [Symbol.dispose](): void;
131
- compile_query(query: string): CompileQueryResult;
132
- set_time_range(start?: Date | null, end?: Date | null): void;
133
- compile_query_at(query: string, at?: number | null): ContextualResult;
134
- set_catalog_provider(provider: CatalogProvider): void;
135
- get_statement_datasets(query: string): QueryDatasetsResult;
136
- get_function_descriptions(): FunctionDescription[];
137
- set_time_range_expression(expression: string): ContextualTranslationErrors | undefined;
138
- constructor();
139
- }
140
-
141
- declare const buildCatalog: (startingCatalog: Catalog, resources: Array<CatalogResource>) => Promise<BuildCatalogOutput>;
136
+ free(): void;
137
+ [Symbol.dispose](): void;
138
+ compile_query(query: string): CompileQueryResult;
139
+ compile_query_at(query: string, at?: number | null): ContextualResult;
140
+ get_function_descriptions(): FunctionDescription[];
141
+ get_statement_datasets(query: string): QueryDatasetsResult;
142
+ constructor();
143
+ set_catalog_provider(provider: CatalogProvider): void;
144
+ set_time_range(start?: Date | null, end?: Date | null): void;
145
+ set_time_range_expression(expression: string): ContextualTranslationErrors | undefined;
146
+ }
147
+
148
+ type CatalogResourceInput = Omit<CatalogResource, "isUnion"> & {
149
+ isUnion?: boolean;
150
+ };
151
+ declare const buildCatalog: (startingCatalog: Catalog, resources: Array<CatalogResourceInput>) => Promise<BuildCatalogOutput>;
142
152
 
143
153
  declare const compileHamelin: (catalog: Catalog, hamelinInput: string, timeRange?: string) => Promise<CompileQueryResult>;
144
154
  declare const createCompiler: (catalog: Catalog) => Promise<Compiler>;
145
155
 
146
156
  declare const hamelinGrammar = "grammar Hamelin;\n\nidentifierEOF\n : identifier EOF\n ;\n\nqueryEOF\n : query EOF\n ;\n\ncommandEOF\n : command EOF\n ;\n\nexpressionEOF\n : expression EOF\n ;\n\npipelineEOF\n : pipeline EOF\n ;\n\nsimpleIdentifierEOF\n : simpleIdentifier EOF\n ;\n\nquery\n : (WITH simpleIdentifier ASSIGN pipeline)+ pipeline #withQuery\n | pipeline #standaloneQuery\n | expression #expressionQuery\n ;\n\npipeline\n : command ( PIPE command )* #pipelineAlt\n ;\n\ncommand\n : LET_COMMAND assignment (COMMA assignment)* COMMA? #letCommand\n | WHERE_COMMAND expression #whereCommand\n | SELECT_COMMAND assignmentClause (COMMA assignmentClause)* COMMA? #selectCommand\n | DROP_COMMAND selection (COMMA selection)* COMMA? #dropCommand\n | FROM_COMMAND fromClause (COMMA fromClause)* COMMA? #fromCommand\n | UNION_COMMAND fromClause (COMMA fromClause)* COMMA? #unionCommand\n | LIMIT_COMMAND expression #limitCommand\n | PARSE_COMMAND src=expression? string\n AS? identifier (COMMA identifier)* COMMA? NODROP? #parseCommand\n | WITHIN_COMMAND expression #withinCommand\n | AGG_COMMAND (assignmentClause (COMMA assignmentClause)*)? COMMA?\n (BY groupClause (COMMA groupClause)*)? COMMA?\n (SORT BY? sortExpression (COMMA sortExpression)*)? COMMA? #aggCommand\n | SORT BY? sortExpression (COMMA sortExpression)* COMMA? #sortCommand\n | WINDOW_COMMAND assignmentClause (COMMA assignmentClause)* COMMA?\n (BY groupClause (COMMA groupClause)*)? COMMA?\n (SORT BY? sortExpression (COMMA sortExpression)*)? COMMA?\n (WITHIN_COMMAND within=expression)? #windowCommand\n | APPEND_COMMAND tableReference (DISTINCT_BY selection (COMMA selection)*)? COMMA? #appendCommand\n | (JOIN_COMMAND | LOOKUP_COMMAND) fromClause (ON on=expression)? #joinCommand\n | EXPLODE_COMMAND assignmentClause #explodeCommand\n | (UNNEST_COMMAND | ROWS_COMMAND) expression #unnestCommand\n | NEST_COMMAND identifier #nestCommand\n | MATCH_COMMAND pattern+\n (AGG_COMMAND assignmentClause (COMMA assignmentClause)*)? COMMA?\n (BY groupClause (COMMA groupClause)*)? COMMA?\n (SORT BY? sortExpression (COMMA sortExpression)*)? COMMA?\n (WITHIN_COMMAND within=expression)? COMMA? #matchCommand\n ;\n\nassignmentClause: assignment | expression;\ngroupClause: assignmentClause;\nassignment: identifier ASSIGN expression;\nselection: identifier;\nsortExpression: expression (ASC | DESC)?;\ntableAlias: simpleIdentifier ASSIGN tableReference;\nfromClause: tableAlias | tableReference;\n\nexpression\n // Keep this list in precedence order (important!)\n : operator=(MINUS | PLUS) expression #unaryPrefixOperator\n | expression (SECOND_TRUNC | MINUTE_TRUNC | HOUR_TRUNC | DAY_TRUNC | WEEK_TRUNC |\n MONTH_TRUNC | QUARTER_TRUNC | YEAR_TRUNC) #tsTrunc\n | left=expression operator=DOT right=simpleIdentifier #deref\n | value=expression LBRACKET index=expression RBRACKET #indexAccess\n | left=expression operator=(ASTERISK | SLASH | PERCENT) right=expression #binaryOperator\n | left=expression operator=(PLUS | MINUS) right=expression #binaryOperator\n | operator=RANGE expression #unaryPrefixOperator\n | expression operator=RANGE #unaryPostfixOperator\n | left=expression operator=RANGE right=expression #binaryOperator\n | left=expression operator=AS right=hamelintype #cast\n | left=expression\n operator=(EQ | NEQ | LT | LTE | GT | GTE | IS | ISNOT | IN | NOTIN)\n right=expression #binaryOperator\n | operator=NOT expression #unaryPrefixOperator\n | left=expression operator=AND right=expression #binaryOperator\n | left=expression operator=OR right=expression #binaryOperator\n\n // Complex Literals\n | left=expression operator=COLON right=expression #pairLiteral\n | LCURLY simpleIdentifier COLON expression\n (COMMA simpleIdentifier COLON expression)* COMMA? RCURLY #structLiteral\n | LPARENS ((expression COMMA) |\n (expression COMMA expression (COMMA expression)* COMMA?)) RPARENS #tupleLiteral\n | LBRACKET (expression (COMMA expression)* COMMA?)? RBRACKET #arrayLiteral\n\n // Function Calls\n | functionName=simpleIdentifier\n LPARENS (namedArgument (COMMA namedArgument)* COMMA?)? RPARENS #functionCall\n | functionName=simpleIdentifier LPARENS\n positionalArgument (COMMA positionalArgument)*\n (COMMA namedArgument)* COMMA? RPARENS #functionCall\n\n // Primitive Literals\n | NULL #nullLiteral\n | number #numericLiteral\n | TRUE #booleanLiteral\n | FALSE #booleanLiteral\n | string #stringLiteral\n | BINARY_LITERAL #binaryLiteral\n | RANGE #unboundRangeLiteral\n | (NANOSECOND_INTERVAL | MICROSECOND_INTERVAL | MILLISECOND_INTERVAL |\n SECOND_INTERVAL | MINUTE_INTERVAL | HOUR_INTERVAL | DAY_INTERVAL |\n WEEK_INTERVAL | MONTH_INTERVAL | QUARTER_INTERVAL | YEAR_INTERVAL) #intervalLiteral\n | ROWS_LITERAL #rowsLiteral\n | (SECOND_TRUNC | MINUTE_TRUNC | HOUR_TRUNC | DAY_TRUNC | WEEK_TRUNC |\n MONTH_TRUNC | QUARTER_TRUNC | YEAR_TRUNC) #tsTruncTimestampLiteral\n\n // Environment references\n | columnReference #columnReferenceAlt\n\n // Keep this at the bottom to use it the mechanism that query authors have to specify precedence.\n | LPARENS expression RPARENS #parenthesizedExpression\n ;\n\nhamelintype\n : simpleIdentifier LPARENS hamelintype (COMMA hamelintype)* COMMA? RPARENS #parameterizedType\n | simpleIdentifier LPARENS INTEGER_VALUE (COMMA INTEGER_VALUE)* COMMA? RPARENS #typeWithArguments\n | LCURLY simpleIdentifier COLON hamelintype\n (COMMA simpleIdentifier COLON hamelintype)* COMMA? RCURLY #structType\n | LPARENS hamelintype (COMMA hamelintype )* COMMA? RPARENS #tupleType\n | simpleIdentifier #simpleType\n ;\n\npattern\n : fromClause quantifier? #quantified\n | LPARENS pattern+ RPARENS quantifier #nested\n ;\n\nquantifier\n : ASTERISK #AnyNumber\n | PLUS #AtLeastOne\n | QUESTIONMARK #ZeroOrOne\n | LCURLY INTEGER_VALUE RCURLY #Exactly\n ;\n\ncolumnReference\n : simpleIdentifier\n ;\n\ntableReference\n : identifier\n ;\n\nidentifier\n : simpleIdentifier\n | simpleIdentifier (DOT simpleIdentifier)+\n ;\n\nsimpleIdentifier\n : IDENTIFIER #unquotedIdentifier\n | BACKQUOTED_IDENTIFIER #backQuotedIdentifier\n ;\n\nstring\n : SINGLE_QUOTED_STRING #basicSingleQuotedStringLiteral\n | DOUBLE_QUOTED_STRING #basicDoubleQuotedStringLiteral\n | value=SINGLE_QUOTED_UNICODE_STRING (UESCAPE uesc=SINGLE_QUOTED_STRING)? #unicodeSingleQuotedStringLiteral\n | value=DOUBLE_QUOTED_UNICODE_STRING (UESCAPE uesc=DOUBLE_QUOTED_STRING)? #unicodeDoubleQuotedStringLiteral\n ;\n\nnumber\n : value=DECIMAL_VALUE #decimalLiteral\n | value=DOUBLE_VALUE #scientificLiteral\n | value=INTEGER_VALUE #integerLiteral\n ;\n\npositionalArgument\n : expression\n ;\n\nnamedArgument\n : simpleIdentifier ASSIGN expression\n ;\n\n//\n// COMMANDS\n//\n\nLET_COMMAND: 'LET' | 'let';\nWHERE_COMMAND: 'WHERE' | 'where';\nSELECT_COMMAND: 'SELECT' | 'select';\nDROP_COMMAND: 'DROP' | 'drop';\nFROM_COMMAND: 'FROM' | 'from';\nUNION_COMMAND: 'UNION' | 'union';\nLIMIT_COMMAND: 'LIMIT' | 'limit';\nPARSE_COMMAND: 'PARSE' | 'parse';\nWITHIN_COMMAND: 'WITHIN' | 'within';\nAGG_COMMAND: 'AGG' | 'agg';\nWINDOW_COMMAND: 'WINDOW' | 'window';\nAPPEND_COMMAND: 'APPEND' | 'append';\nJOIN_COMMAND: 'JOIN' | 'join';\nLOOKUP_COMMAND: 'LOOKUP' | 'lookup';\nEXPLODE_COMMAND: 'EXPLODE' | 'explode';\nUNNEST_COMMAND: 'UNNEST' | 'unnest';\nNEST_COMMAND: 'NEST' | 'nest';\nROWS_COMMAND: 'ROWS' | 'rows';\nMATCH_COMMAND: 'MATCH' | 'match';\n\nAS: 'AS' | 'as';\nNODROP: 'NODROP' | 'nodrop';\n\n//\n// Operators\n//\n\nPLUS: '+';\nMINUS: '-';\nASTERISK: '*';\nSLASH: '/';\nPERCENT: '%';\nLCURLY: '{';\nRCURLY: '}';\nCOLON: ':';\nQUESTIONMARK: '?';\n\nEQ: '==';\nNEQ: '!=';\nLT: '<';\nLTE: '<=';\nGT: '>';\nGTE: '>=';\nRANGE: '..';\n\nASSIGN: '=';\n\n//\n// Keywords\n//\n\nAND: 'AND' | 'and';\nFALSE: 'FALSE' | 'false';\nIS: 'IS' | 'is';\nNOT: 'NOT' | 'not';\nISNOT: 'IS NOT' | 'is not';\nIN: 'IN' | 'in';\nNOTIN: 'NOT IN' | 'not in';\nNULL: 'NULL' | 'null';\nOR: 'OR' | 'or';\nTRUE: 'TRUE' | 'true';\nUESCAPE: 'UESCAPE' | 'uescape';\nWITH: 'WITH' | 'with';\nBY: 'BY' | 'by';\nSORT: 'SORT' | 'sort';\nASC: 'ASC' | 'asc';\nDESC: 'DESC' | 'desc';\nDISTINCT_BY: 'DISTINCT BY' | 'distinct by';\nON: 'ON' | 'on';\nWHEN: 'WHEN' | 'when';\n\n//\n// Symbols\n//\n\nCOMMA: ',';\nPIPE: '|';\nLPARENS: '(';\nRPARENS: ')';\nDOT: '.';\nLBRACKET: '[';\nRBRACKET: ']';\n\n//\n// Literals\n//\n\nSINGLE_QUOTED_STRING\n : '\\'' ( ~'\\'' | '\\'\\'')* '\\''\n ;\nDOUBLE_QUOTED_STRING\n : '\"' ( ~'\"' | '\"\"')* '\"'\n ;\n\nSINGLE_QUOTED_UNICODE_STRING\n : 'U&\\'' ( ~'\\'' | '\\'\\'')* '\\'' // Unicode string with default escape character: U&'Hello winter \\2603 !'\n ;\n\nDOUBLE_QUOTED_UNICODE_STRING\n : 'U&\"' ( ~'\"' | '\"\"')* '\"' // Unicode string with custom escape character: U&'Hello winter #2603 !' UESCAPE '#'\n ;\n\n// Note: we allow any character inside the binary literal and validate\n// its a correct literal when the AST is being constructed. This\n// allows us to provide more meaningful error messages to the user\nBINARY_LITERAL\n : 'x\\'' (~'\\'')* '\\''\n ;\n\nNANOSECOND_INTERVAL\n : DECIMAL_INTEGER 'ns'\n ;\n\nMICROSECOND_INTERVAL\n : DECIMAL_INTEGER 'us'\n ;\n\nMILLISECOND_INTERVAL\n : DECIMAL_INTEGER 'ms'\n ;\n\nSECOND_INTERVAL\n : DECIMAL_INTEGER ('s' | 'sec' | 'secs' | 'second' | 'seconds')\n ;\n\nMINUTE_INTERVAL\n : DECIMAL_INTEGER ('m' | 'min' | 'mins' | 'minute' | 'minutes')\n ;\n\nHOUR_INTERVAL\n : DECIMAL_INTEGER ('h' | 'hr' | 'hrs' | 'hour' | 'hours')\n ;\n\nDAY_INTERVAL\n : DECIMAL_INTEGER ('d' | 'day' | 'days')\n ;\n\nWEEK_INTERVAL\n : DECIMAL_INTEGER ('w' | 'week' | 'weeks')\n ;\n\nMONTH_INTERVAL\n : DECIMAL_INTEGER ('mon' | 'month' | 'months')\n ;\n\nQUARTER_INTERVAL\n : DECIMAL_INTEGER ('q' | 'qtr' | 'qtrs' | 'quarter' | 'quarters')\n ;\n\nYEAR_INTERVAL\n : DECIMAL_INTEGER ('y' | 'yr' | 'yrs' | 'year' | 'years')\n ;\n\nROWS_LITERAL\n : DECIMAL_INTEGER ('r' | 'row' | 'rows')\n ;\n\nSECOND_TRUNC\n : '@' ('s' | 'sec' | 'secs' | 'second' | 'seconds')\n ;\n\nMINUTE_TRUNC\n : '@' ('m' | 'min' | 'mins' | 'minute' | 'minutes')\n ;\n\nHOUR_TRUNC\n : '@' ('h' | 'hr' | 'hrs' | 'hour' | 'hours')\n ;\n\nDAY_TRUNC\n : '@' ('d' | 'day' | 'days')\n ;\n\nWEEK_TRUNC\n : '@' ('w' | 'week' | 'weeks')\n ;\n\nMONTH_TRUNC\n : '@' ('mon' | 'month' | 'months')\n ;\n\nQUARTER_TRUNC\n : '@' ('q' | 'qtr' | 'qtrs' | 'quarter' | 'quarters')\n ;\n\nYEAR_TRUNC\n : '@' ('y' | 'yr' | 'yrs' | 'year' | 'years')\n ;\n\nINTEGER_VALUE\n : DECIMAL_INTEGER\n | HEXADECIMAL_INTEGER\n | OCTAL_INTEGER\n | BINARY_INTEGER\n ;\n\nDECIMAL_VALUE\n : DECIMAL_INTEGER '.' DECIMAL_INTEGER\n | '.' DECIMAL_INTEGER\n ;\n\nDOUBLE_VALUE\n : DIGIT+ ('.' DIGIT*)? EXPONENT\n | '.' DIGIT+ EXPONENT\n ;\n\nIDENTIFIER\n : [a-zA-Z_][a-zA-Z_0-9]*\n ;\n\nBACKQUOTED_IDENTIFIER\n : '`' ( ~'`' | '``' )* '`'\n ;\n\nfragment DECIMAL_INTEGER\n : DIGIT ('_'? DIGIT)*\n ;\n\nfragment HEXADECIMAL_INTEGER\n : '0x' ('_'? (DIGIT | [A-F]))+\n ;\n\nfragment OCTAL_INTEGER\n : '0o' ('_'? [0-7])+\n ;\n\nfragment BINARY_INTEGER\n : '0b' ('_'? [01])+\n ;\n\nfragment EXPONENT\n : 'e' [+-]? DIGIT+\n ;\n\nfragment DIGIT\n : [0-9]\n ;\n\n//\n// Comments and whitespace\n//\n\nSIMPLE_COMMENT\n : '//' ~[\\r\\n]* '\\r'? '\\n'? -> channel(HIDDEN)\n ;\n\nBRACKETED_COMMENT\n : '/*' .*? '*/' -> channel(HIDDEN)\n ;\n\nWS\n : [ \\r\\n\\t]+ -> skip\n ;\n";
147
157
 
148
- declare const hamelinPrompt = "## Hamelin Instructions\n\nI am going to teach you a new query language, it\u2019s called Hamelin. Its main purpose is to support SIEM use-cases. In some ways, it is similar to SQL, PRQL, Elastic Query Language, Sumo Logic Query Language or Splunk Query Language, though it has some distinct properties. It\u2019s a pipe language, which processes data and allows to separate the operations using pipes. Throughout this guide, I will include some examples in <example></example> tags.\n\nHamelin query consists out of one or more operations separated by pipe (\"|\"). It is very important to memorize the commands, as each pipe operation must start with either of these. Following are the only commands supported:\n\n- `FROM` is the beginning of every Hamelin query. Every query must pull data from _somewhere._ One exception is that Hamelin can be used to calculate some expression value too, e.g. <example>LET x = 8 * 2</example>.\n- `SELECT` allows to specify which columns are selected: <example>SELECT <col1>, <col2>, <col3> = <val></example> drops all columns except columns 1 and 2, and also defines a 3rd column to a new, specific value. It\u2019s optional and by default, SELECT * is implied (which selects all columns).\n- `WHERE` filters the rows that flow through it based on applying a boolean condition to each of them.\n- `LET <col> = <val>` defines a new column value, or shadows an existing value.\n- `DROP <col1>, <col2>` removes col1 and col2 from the projection downstream and in the final result set.\n- `WITH <tab1> = <operations>` associates the tab1 with specified operations (until two newlines are encountered) , so tab1 can be reused later\n- `AGG <function1, function2, ...> BY <column1, column2, ...>` aggregates and groups by the data accordingly\n- `WINDOW` applies aggregation or window functions over sliding windows of data.\n- `MATCH` correlates multiple event patterns together.\n- `SORT <columns> <optional order>` sorts the rows using specified criteria (and `ASC` or `DESC` order, `ASC` being default)\n- `LIMIT <limit>` limits the output rows\n- `UNNEST <expr>` lifts a struct or array of structs into the parent or enclosing result set.\n- `EXPLODE <expr or assignment_clause>` expand array fields into separate rows.\n\nIt is extremely important to start each pipe section using one of the commands and use pipe (\"|\") to separate further operations, e.g.\n<example>\nFROM example | WHERE name = \"John Smith\" | SELECT name, address\n</example>\n\nFor example, to select columns \"c1\" and \"c2\" from table \"A\", one needs to write:\n\n<example>\nFROM A\n| SELECT c1, c2\n</example>\n\nTo add a filter for \"c1=3\" condition:\n\n<example>\nFROM A\n| WHERE c1 == 3\n| SELECT c1, c2\n</example>\n\nTo select all data from table \"A\" and associate column \"v\" value multipled by 2 with new column \"x\"\"\n\n<example>\nFROM A\n| LET x = v*2\n</example>\n\nTo associate column c1 with new column x and remove c1 and c3, this can be used:\n\n<example>\nFROM A\n| LET x = c1\n| DROP c1, c3\n</example>\n\nIf only c1, c2 and c3 are present, the above can be rewritten using just `SELECT` :\n<example>\nFROM A\n| SELECT x = c1, c2\n</example>\n\n### Aggregations\n\nAggregations are extremely common in queries and Hamelin supports these via `AGG` keyword: `| AGG <aggregation1>, <aggregation2>, ... BY column1, column2, ...`\n\nThe \"BY ...\" part is optional. Any columns or operations not specified in the aggregation operation will be dropped.\nThe list of aggregations is optional as well. When not specifying it, the operation acts in a similar way as the DISTINCT operator in SQL.\n\nTo aggregate rows in table \"A\" using count and group by c1:\n\n<example>\nFROM A\n | AGG count = count() BY c1\n</example>\n\nTo get distinct c1 values in table \"A\" and ignore nulls:\n\n<example>\nFROM A\n | AGG BY c1\n | WHERE c1 IS NOT NULL\n</example>\n\nTo aggregate rows in table \"A\" using count, group by c1 and select 10 most common values:\n\n<example>\nFROM A\n | AGG count = count() BY c1\n | SORT count DESC\n | LIMIT 10\n</example>\n\nTo take max of column c1, avg of column c2 and count, grouped by c3 and order max(c1) and take top 10 results, this can be issued:\n\n<example>\nFROM A\n | AGG count = count(), max = max(c1), avg = avg(c2) BY c3\n | SORT max DESC\n | LIMIT 10\n</example>\n\nIt is also possible to associate the aggregate values with new column names:\n\n<example>\nFROM A\n | AGG count = count(), max_c1 = max(c1), avg_c2 = avg(c2) by c3\n | SORT max_c1 desc\n | LIMIT 10\n</example>\n\nVery important: remember that aggregation selects only columns (and aggregations) specified, everything else is dropped. If you want to filter and aggregate data, consider the right order. E.g. to filter cases where c1 > 500 and then get average value of c2, grouped by c3 from table tbl, sorted by c2, this is the correct order (WHERE before AGG):\n\n<example>\nFROM tbl\n| WHERE c1 > 500\n| AGG avg_c2 = avg(c2) BY c3\n| SORT avg_c2\n</example>\n\nOf course, there are also cases when WHERE is applied after aggregation. E.g. to get average of c1 and count, grouped by c3, only where count() > 10, this will be a correct query:\n\n<example>\nFROM tbl\n | AGG count = count(), avg_c1 = avg(c1) BY c3\n | WHERE count > 10\n</example>\n\n### Literals\n\nHamelin adopts most of SQL\u2019s conventions for the basic literal syntax and semantics. Integer, decimal, boolean, string literals, and NULL all behave as they do in SQL, and have equivalent syntax.\n\nIn addition, however, Hamelin adds:\n\n- Double-quoted strings, which allow for the use of single quotes in the string without needing escaping.\n- Support for multiline. (You can add literal newlines into a string.)\n\n### Identifiers\n\nThe identifier syntax is similar to how Trino SQL handles identifiers. They are ASCII letter sequences. These sequences can also start with an underscore `_` or contain an underscore. The sequences can also contain digits, but they cannot start with digits. For Hamelin, we want to follow along with the way string _literals_ are using double-quotes in many existing programming languages and therefore we want to stray from the SQL example, which requires the use of single-quotes `'` for strings. This means we do not want to support double-quoted identifiers. We instead support backtick-quoted identifiers: `identifier of life`.\n\nFollowing query retrieves data from test table and filters for two conditions, whether \"curry wurst\" column is equal to the string \"wiener schnitzel\" and \"1a_extra\" column is equal to the string 'mit \"pommes\"' (note the doublequote inside)\n\n<example>\nFROM _test\n| WHERE `curry wurst` == 'wiener schnitzel'\n AND `1a_extra` == 'mit \"pommes\"'\n</example>\n\n### Reserved Keywords\n\nThe following words are reserved in Hamelin and cannot be used as unquoted identifiers:\n\n- Commands: `LET`, `WHERE`, `SELECT`, `DROP`, `FROM`, `UNION`, `LIMIT`, `PARSE`, `WITHIN`, `AGG`, `WINDOW`, `APPEND`, `JOIN`, `LOOKUP`, `EXPLODE`, `UNNEST`, `NEST`, `ROWS`, `MATCH`\n- Operators/Keywords: `AS`, `AND`, `OR`, `NOT`, `IN`, `IS`, `BY`, `ON`, `SORT`, `ASC`, `DESC`, `WITH`, `TRUE`, `FALSE`, `NULL`\n\nWhen a field name in your data conflicts with a reserved keyword, you must escape it using backticks:\n\n<example>\n// WRONG: fails because `as` is a reserved keyword\n| WHERE source.as.organization == 'Example Corp'\n\n// CORRECT: escape the reserved keyword with backticks\n| WHERE source.`as`.organization == 'Example Corp'\n</example>\n\n### Types\n\nHamelin has the following primitive types:\n\n- `boolean`\n- `int` - an umbrella for all integer types of any bit length\n- `decimal(precision, scale)` - exact fixed point values with specified precision and scale (e.g. `100.5`)\n- `double` - floating point numbers with variable precision (e.g. `-9.87e2`)\n- `string` - a string of any length, defined as a literal with either single quotes. String literals can escape quotes of either type by doubling them. (e.g. `\"She said \"\"hello\"\"\"`, `'It''s raining`)\n- `binary` - translates to `varbinary` in SQL for handling binary data\n- `timestamp` - an umbrella for date, timestamp, and all their variants (precision and timezone)\n- `interval` - an exact time duration of seconds, minutes, hours, or days (e.g. `30s`, `14d`)\n- `calendar_interval` - a calendar based time duration of months, quarters, or years (e.g. `3mon`, `5y`)\n- `range` - a span between two values of any primitive type using the `..` operator, which is inclusive on both sides and can be unbounded in either direction (e.g. `ts('2024-01-15')..now()`, `1..10`, `yesterday()..`). This is primarily used for time ranges, but can be used for any sortable type.\n\nHamelin has four composite types: \n\n- A `struct` has a set of fields, each with name and schema. You can access its field `my_field` of `my_struct` by doing `my_struct.my_field`. Struct literals are created with curly braces and the field names and values, e.g. `{ name: 'John Doe', age: 30 }`. You can use `.` notation to assign to nested fields, but only in the root of the assignment.\n <bad_example>LET user = { name: 'John Doe', address.street: '123 Main St'}</bad_example>\n <good_example>LET user = { name: 'John Doe', address: { street: '123 Main St' } }</good_example>\n <good_example>LET user.name = 'John Doe', user.address.street = '123 Main St'</good_example>\n- A `map` has a set of key-value pairs with homogeneous types. You can access the value for key `my_key` in `my_map` by doing `my_map[\"my_key\"]`. Map literals are created with the `map()` function, e.g. `map('name': 'John Doe', 'age': 30)`.\n- An `array` is an ordered list of values with homogeneous types. You can access the value at index 5 in `my_array` my doing `my_array[5]`. Array literals are created with square brackets and the values, e.g. `['John Doe', 'Bob Smith']`.\n- A `variant` is essentially a representation of untyped JSON data. You can access fields of JSON objects with the struct syntax above (e.g. `my_json_obj.my_field`) and you can access items of JSON arrays with the array syntax above (e.g. `my_json_array[5]`).\n\nHamelin types can be casted to one another with the `AS` operator\n\n- If the type cast fails, the expression returns `NULL`\n- Casting never happens implicitly, so to concatenate strings with other types you need to cast. Often parentheses are necessary to make sure to cast the right expression (e.g. `\"Today is \" + (now() AS string)`)\n- Casting is most often useful for casting `variant`s parsed from JSON into the expected composite type (e.g. `| LET parsed = parse_json(message) AS { user_id: string, email: string}`)\n- Array types are casted like `array(string)`, not `string[]`\n- Map types are casted like `map(string, int)`, not `{ string: int }`\n\nAny field in Hamelin can also be `NULL`. There is no concept of a `NOT NULL` field. You can check for `NULL` values with `IS NULL` or `IS NOT NULL`.\n\nNOTE: `variant` and `interval` types can never be saved in the output of a table, view, or detection query, so you must convert/cast them to another type in the output of those datasets. For `variant`, you can cast it to a struct, array, etc. For `interval`, you can use a function like `to_nanos()` to convert it to an `int`.\n\n<example>\nLET event.duration = to_nanos(event.end - event.start)\n</example>\n\n### Array Broadcasting\n\nHamelin automatically broadcasts operations across arrays, letting you use familiar syntax without explicit iteration. When you write `array.field`, Hamelin extracts that field from every element. When you apply comparisons or function calls, they apply to each element individually.\n\nGiven a `users` array of structs like `[{email: 'alice@example.com'}, {email: 'jdoe@example.com'}]`:\n\n- `users.email` broadcasts field access \u2192 `['alice@example.com', 'jdoe@example.com']`\n- `users.email == 'jdoe@example.com'` broadcasts comparison \u2192 `[false, true]`\n- `upper(users.email)` broadcasts function call \u2192 `['ALICE@EXAMPLE.COM', 'JDOE@EXAMPLE.COM']`\n\nBroadcasting pairs naturally with `any()` and `all()` to check conditions across arrays:\n\n<example>\n// Check if any user has a specific email\n| WHERE any(users.email == 'jdoe@example.com')\n\n// Check if all users are admins\n| WHERE all(users.role == 'admin')\n\n// Get unique values from a field across array elements\n| LET unique_emails = array_distinct(users.email)\n</example>\n\n### Operators\n\nHamelin is designed to be very intentional to make interactive query authoring more fluent and ergonomic.\n\n- Boolean operators\n - Negation: `NOT`\n - Conjunction: `AND`\n - Disjunction: `OR`\n- Identity: `IS` (only really used to test against `NULL`).\n- Arithmetic:\n - Addition: `+`\n - Subtraction: `-`\n - Multiplication: `*`\n - Division: `/`\n - Modulo: `%`\n - Negation: `-`\n- Comparison:\n - Equality: `==`\n - Note we use single-equals for _assignment_.\n - Non-equality: `!=`\n - Greater than: `>`\n - Less than: `<`\n - Greater or equal: `>=`\n - Less or equal: `<=`\n - Inclusion: `IN ['item1', 'item2', ...]` or `IN 1..10`\n- Concatenation: `+`\n - For strings: `'one' + 'two'` is `onetwo`\n - For arrays: `['one'] + ['two']` is `['one', 'two']`\n\n### Unioning multiple tables with FROM\n\nHamelin allows a shorthand in the `FROM` command that targets _many_ sources (by separating them with commas (the `,` character). In a system that offers search, authors often want to perform the same search (or filter) over the rows that come from many different places. This is most common in \"needle\" queries, where you\u2019re looking for something relatively rare in a bunch of different data sets. This is not a common pattern in structured querying, likely explaining why SQL makes this so hard, and why there is no direct translation of this form in SQL.\n\nFor example, where table A defines only `f1` and `f2`, and table B defines only `f2` and `f3`, following will work. It will select `f1` and `f2` from table A and `f2` and `f3` from table B, so the full set of fields in the output of the following example will be `f1`, `f2`, `f3`.\n\n<example>\nFROM A, B\n</example>\n\nYou can also alias the fields based on which table they came from if you need to determine which table a given `f2` value came from example. In the following example, the output fields would be a struct `a` with sub-fields `f1` and `f2` and a struct `b` with sub-fields `f2` and `f3`. You can reference the sub-fields later in the query with `a.f1`, `b.f2`, etc. In addition, the fields are all still usable at the top level.\n\n<example>\nFROM a = A, b = B\n</example>\n\nAliasing tables is also useful when aggregating multiple types of rows (often from `WITH` CTEs):\n\n<example>\nWITH network_connections = FROM simba.sysmon_events\n| WHERE winlog.event_id == '3'\n\nWITH suspicious_file_creation = FROM simba.sysmon_events\n| WHERE winlog.event_id == '11'\n\nFROM network_connections = network_connections, suspicious_file_creation = suspicious_file_creation\n| AGG network_connections_count = count(network_connections), suspicious_file_creation_count = count(suspicious_file_creation)\n BY host.name\n</example>\n\n### Functions\n\nThe following functions are supported in expressions (within `LET`, `SELECT`, `WHERE`, etc.):\n\n- len(x[])\n- sum(x[])\n- avg(x[])\n- min(x[])\n- max(x[])\n- array_distinct(x[])\n- filter_null(x[])\n- slice(x[], start, end) - returns a slice of the array x from start to end (exclusive). Supports negative indices to count from the end of the array.\n- flatten(x[]) - flattens a nested array by one level.\n- all(x[])\n- any(x[])\n- len(x[])\n- abs(x)\n- cbrt(x)\n- ceil(x)\n- floor(x)\n- degrees(x)\n- e() - Euler's number\n- exp(x)\n- ln(x)\n- log(b, x)\n- log10(x)\n- log2(x)\n- pi()\n- pow(x,p)\n- power(x,p)\n- radians(x)\n- round(x)\n- round(x,d)\n- sign(x)\n- sqrt(x)\n- truncate(x) - Remove the fractional part of a number\n- width_bucket(x,bound1,bound2,n) - the bucket number for a value in a histogram with equal-width buckets\n- width_bucket(x, bins[]) - the bucket number for a value using explicitly defined bucket boundaries\n- if(condition, then, else) - Both `then` and `else` must have the same type\n- case(when_condition: then_condition, when_condition: then_expression, ...) - Evaluates multiple condition-value pairs in order and returns the value associated with the first condition that evaluates to true. Returns null if no conditions are true. Every `then` must have the same type. There is no `else` in this expression, but you can achieve the same results by having `true` in the final `then_expression` like `case(when_condition: then_condition, when_condition: then_expression, ..., true: else_expression)`.\n- regexp_count(str, pattern)\n- regexp_extract(str, pattern)\n- regexp_extract(str, pattern, group)\n- regexp_extract_all(str, pattern)\n- regexp_extract_all(str, pattern, group)\n- regexp_like(string, pattern)\n- regexp_position(string, pattern, start)\n- regexp_replace(string, pattern)\n- regexp_replace(string, pattern, replacement)\n- regexp_split(string, pattern)\n- split(str, separator)\n- array_join(x[], separator)\n- replace(str, str_to_remove)\n- starts_with(str, prefix)\n- ends_with(str, suffix)\n- contains(str, substring)\n- len(str)\n- lower(str)\n- upper(str)\n- slice(str, start, end) - returns a substring of the str from start to end (exclusive). Supports negative indices to count from the end of the array.\n- parse_json(json_str) - Only takes a string, never pass a map or struct\n- to_json_string(json_variant) - Only takes a variant. To pass other types, cast to variant with `AS variant`\n- typeof(x)\n- coalesce(x[]) - returns the first non-null value from a list of expressions. All arguments must be type-compatible (e.g. all strings, all ints). To coalesce a variant with a default string, cast the variant first: `coalesce(my_variant AS string, '')`\n- first(x)\n- last(x)\n- now()\n- today()\n- yesterday()\n- tomorrow()\n- ts(str)\n- at_timezone(timestamp, timezone)\n- year(timestamp)\n- month(timestamp)\n- day(timestamp)\n- hour(timestamp)\n- minute(timestamp)\n- second(timestamp)\n- from_unixtime_micros(int) - returns timestamp\n- from_unixtime_millis(int) - returns timestamp\n- from_unixtime_nanos(int) - returns timestamp\n- from_unixtime_seconds(int) - returns timestamp\n- from_millis(x) - returns interval\n- from_nanos(x) - returns interval\n- to_unixtime(timestamp) - returns double in seconds since the Unix epoch\n- to_millis(interval)\n- to_nanos(interval)\n- map(keys, values)\n- map(elements)\n- map_keys(map)\n- map_values(map)\n- next(expression)\n- cidr_contains(cidr, ip)\n- is_ipv4(ip)\n- is_ipv6(ip)\n\nThe following functions are ONLY available within an AGG or WINDOW command (not within LET, SELECT, or WHERE commands):\n\n- all(bool_expr) - returns true if bool_expr is true for all rows\n- any(bool_expr) - returns true if bool_expr is true for any row\n- any_value(x) - returns an arbitrary value from each group\n- approx_percentile(x, percentile)\n- array_agg(x)\n- avg(x)\n- count_distinct(x) - counts the number of distinct non-null values of x\n- approx_distinct(x) - same as count_distinct(x) but uses an approximation algorithm that is faster but less accurate\n- count_if(condition) - counts all rows where condition is true\n- count(expr) - counts all rows where expr is non-null\n- count() - counts all rows\n- map_agg(key, value)\n- max(x)\n- min(x)\n- multimap_agg(key, value)\n- stddev(x)\n- sum(x)\n\nThe following functions are ONLY available within a WINDOW command (not within LET, SELECT, WHERE, or AGG commands):\n\n- first_value(expression)\n- last_value(expression)\n- nth_value(expression, n)\n- cume_dist() - the number of rows with values less than or equal to the current row's value, divided by the total number of rows in the partition\n- percent_rank() - the percentile rank of each row within the window partition\n- dense_rank() - the rank of each row within a window partition without gaps\n- lag(expression, offset, ignore_nulls) - the value of an expression from a previous row within the window\n- lead(expression, offset, ignore_nulls) - the value of an expression from a subsequent row within the window\n- rank() - the rank of each row within a window partition with gaps\n- row_number() - a sequential row number for each row within a window partition\n\n### Describing time ranges\n\nUse `<low>..<high>` to describe a time range. Drop either `low` or `high` from the expression to communicate that side being unbounded. Allow the use of interval values, like `WITHIN 1hr..`. This time range syntax is used in the `timeRange` input of tools such as `execute-hamelin-query` and `query-page-run-query` to filter the result set to only the rows that have a `timestamp` in that time range. The time range syntax can be used in various parts of a Hamelin query:\n\n- `| WHERE val IN low..high` allows `IN` to be used as a binary boolean operator to test any sortable value against a range. This can be used anywhere boolean expressions are needed.\n- `| WINDOW WITHIN low..high` defines the frame clause of a window expression, filtering only those rows which are within the interval defined in terms of the current (or output) row.\n\nWhenever providing timestamp literals, you must use a format of `('<TIMESTAMP>' AS timestamp)`. For example, to describe noon on March 1st, 2020, use `('2020-03-01 12:00' AS timestamp)`. To select all rows from table A with timestamp greater than March 1st, 2020 on noon, you can use:\n<example>\nFROM A\n| WHERE timestamp > ('2025-03-01 12:00' AS timestamp)\n</example>\n\nTo select rows from the last hour, use `timeRange`:\n\n<example>\n-1h..\n</example>\n\nTo select rows which were between two and one hour ago, use the `timeRange`:\n\n<example>\n-2h..-1h\n</example>\n\nTo select rows which occured during the past 7 days, use the `timeRange`:\n\n<example>\n-1w..\n</example>\n\nThe `@` operator can also be used on a timestamp to round timestamps down to the nearest unit (day, hour, etc.).\n\nTo select rows in the previous 5 calendar days (not including today), use the `timeRange`:\n\n<example>\n-5d@d..now()@d\n</example>\n\n### Windows\n\n- `WINDOW` is a command that behaves like a mix of `AGG` and `LET`. It has three optional arguments that come at the end, all of which define a window:\n - `BY <column1, column2>` means everything in the let is partitioned by this. (e.g., `BY user` means compute the function in a user-relative window).\n - `SORT <column1, column2, ...>` means everything in the let sorted by this. (e.g., `SORT timestamp`).\n - `WITHIN <range>` means everything in the let uses this range as the frame. (e.g., `WITHIN -5m` means within a 5 minute sliding window).\n - Here, within uses nearly the same semantics as the `WITHIN` command.\n - We introduce a new row interval literal with the suffix `r` or `rows` or `row` for defining ranges over exact numbers of rows rather than over ranges of values.\n- All three are optional, but the presence of any means _every_ expression in the let is defined \"over\" this window.\n\nFor example, to do count in 5 minute windows in table tbl:\n\n<example>\nFROM tbl | WINDOW c = count() WITHIN -5m\n</example>\n\n#### Rate-Based Detection with WINDOW...WITHIN\n\nUse `WINDOW ... WITHIN` for sliding window aggregations to detect bursts or spikes of activity.\n\n**When to use:** Count/aggregate events within a moving time window (e.g., \"10+ failed logins in the last 5 minutes\")\n\n**Key characteristics:**\n- **Purpose**: Detect bursts/spikes of activity (rate-based detection)\n- **Syntax**: `WINDOW ... BY grouping_keys WITHIN -duration`\n- **Window moves with time**: Always looks at the most recent N minutes/hours\n- **Use cases**: Rate limiting, burst detection, threshold alerting, brute force attacks\n- **Duration format**: `-5min`, `-1h`, `-1d` (negative values look backward in time)\n\n**Example: Brute Force Detection**\n\n<example>\nFROM simba.okta_events\n| WHERE event.action == \"user.session.start\" AND event.outcome == \"failure\"\n| WINDOW\n failed_login_count = count(),\n unique_ips = array_agg(source.ip)\n BY user.name\n WITHIN -5min\n| WHERE failed_login_count >= 10\n| SELECT\n timestamp,\n user.name,\n event.count = failed_login_count,\n source.ip_count = unique_ips\n</example>\n\n**Common WINDOW patterns:**\n\n<example>\n// Count events in sliding window\nFROM dataset | WINDOW event_count = count() BY host WITHIN -10min\n</example>\n\n<example>\n// Conditional counting\nFROM dataset\n| WINDOW\n malicious_count = count_if(is_malicious),\n total_count = count()\n BY user.name\n WITHIN -30min\n</example>\n\n### Event Correlation with MATCH\n\nThe `MATCH` command enables powerful multi-stage detection by correlating different event patterns. This is essential for detecting attack chains and reducing false positives.\n\n**When to use MATCH:**\n- Detecting multi-stage attacks (initial access \u2192 execution \u2192 persistence)\n- Correlating different behavioral indicators\n- Building high-fidelity detections that require multiple suspicious events\n- Creating attack chain detections with context\n\n**MATCH Pattern Structure:**\n\n```hamelin\n// Step 1: Define individual event patterns as WITH clauses\nWITH pattern1 = FROM dataset\n| WHERE <specific conditions>\n| SELECT fields...\n\nWITH pattern2 = FROM dataset\n| WHERE <different conditions>\n| SELECT fields...\n\n// Step 2: Correlate patterns using MATCH (space-separated!)\nMATCH p1=pattern1? p2=pattern2?\nBY correlation_key // e.g., host, user, session_id\n| LET indicator_count = (\n if(p1 IS NOT NULL, 1, 0) +\n if(p2 IS NOT NULL, 1, 0)\n) AS int\n| WHERE indicator_count >= 2 // Alert when 2+ patterns match\n\n// Step 3: Aggregate and enrich\n| AGG\n timestamp = min(timestamp),\n event.start = min(timestamp),\n event.end = max(timestamp),\n pattern1_event = array_agg(p1)[0],\n pattern2_event = array_agg(p2)[0],\n indicator_count = max(indicator_count)\n BY match_number, correlation_key\n\n// Step 4: Apply time window constraint\n| WHERE event.end - event.start <= 10min\n\n// Step 5: Build output with risk scoring\n| LET risk_score = (\n if(pattern1_event IS NOT NULL, 30, 0) +\n if(pattern2_event IS NOT NULL, 30, 0)\n) AS int\n| SELECT timestamp, fields...\n```\n\n**Critical MATCH Syntax Rules:**\n- Pattern aliases are **space-separated**, NOT comma-separated: `MATCH a=query1? b=query2? c=query3?`\n- Use `?` quantifier to make patterns optional (allows partial matches)\n- The `BY` clause specifies the correlation key (what ties events together)\n- Access matched events in AGG using `array_agg(alias)[0]`\n- Time window filtering: `WHERE event.end - event.start <= duration`\n\n**Example: Multi-Stage Malware Detection**\n\n<example>\n// Define behavioral indicators\nWITH lnk_powershell = FROM simba.sysmon_events\n| WHERE event.code == \"1\"\n| LET parent_image = coalesce(winlog.event_data[\"ParentImage\"], '') AS string\n| LET image = coalesce(winlog.event_data[\"Image\"], '') AS string\n| LET cmd_line = coalesce(winlog.event_data[\"CommandLine\"], '') AS string\n| LET host = host.name AS string\n| WHERE regexp_like(lower(parent_image), '(?i).*\\\\\\\\explorer\\\\.exe')\n AND regexp_like(lower(image), '(?i).*(powershell|pwsh)\\\\.exe')\n AND regexp_like(cmd_line, '(?i).*(hidden|-enc|-encodedcommand).*')\n| SELECT timestamp, host, process_image = image, process_commandline = cmd_line\n\nWITH cloud_download = FROM simba.sysmon_events\n| WHERE event.code == \"1\"\n| LET image = coalesce(winlog.event_data[\"Image\"], '') AS string\n| LET cmd_line = coalesce(winlog.event_data[\"CommandLine\"], '') AS string\n| LET host = host.name AS string\n| WHERE regexp_like(lower(image), '(?i).*(powershell|pwsh)\\\\.exe')\n AND regexp_like(cmd_line, '(?i).*(github\\\\.com|dropbox\\\\.com).*')\n AND regexp_like(cmd_line, '(?i).*(downloadstring|invoke-webrequest).*')\n| SELECT timestamp, host, process_image = image, process_commandline = cmd_line\n\n// Correlate: alert when 2+ indicators on same host within 10 min\nMATCH lnk=lnk_powershell? dl=cloud_download?\nBY host\n| LET indicator_count = (\n if(lnk IS NOT NULL, 1, 0) +\n if(dl IS NOT NULL, 1, 0)\n) AS int\n| WHERE indicator_count >= 2\n| AGG\n timestamp = min(timestamp),\n event.start = min(timestamp),\n event.end = max(timestamp),\n lnk_event = array_agg(lnk)[0],\n dl_event = array_agg(dl)[0],\n indicator_count = max(indicator_count)\n BY match_number, host\n| WHERE event.end - event.start <= 10min\n| LET risk_score = (\n if(lnk_event IS NOT NULL, 40, 0) +\n if(dl_event IS NOT NULL, 40, 0)\n) AS int\n| SELECT\n timestamp,\n event.start,\n event.end,\n host,\n event.count = indicator_count,\n event.risk_score = risk_score\n</example>\n\n### UNNEST and EXPLODE\n\nThe UNNEST command lifts struct or array of struct fields into the parent or enclosing result set. When given a struct, it lifts struct fields into the parent struct without changing row cardinality. When given an array of struct, it performs an explode operation followed by unnesting, creating one row per array element with the struct fields lifted into the parent struct.\n\nThe EXPLODE command transforms rows containing array fields into multiple rows, with each element of the array becoming a separate row. Each array element becomes a new row with all other fields from the original row preserved in each generated output row. When you use assignment syntax (`identifier = expression`), the exploded values are placed in the specified field name. Without assignment syntax, the exploded values replace the original array field.\n\n### More valid examples\n\nTo find records in table foo where column c1 is greater than 42, one needs to write:\n\n<example>\nFROM foo | WHERE c1 > 42\n</example>\n\nTo take unique (distinct) tuples of hostname and dest_port in table1, you can use AGG without aggregation functions:\n<example>\nFROM table1 | AGG BY hostname, dest_port\n</example>\n\nTo get number of flows by protocol and destination_port, then select top 10 by count, one needs to write:\n<example>\nFROM flows\n| AGG count = count() BY protocol, destination_port\n| SORT count DESC\n| LIMIT 10\n</example>\n\nTo get number of flows by day, one needs to write:\n<example>\nFROM flows\n| AGG count = count() BY timestamp@d\n| SORT count DESC\n| LIMIT 10\n</example>\n\nThe resulting data will have two columns: count and timestamp (note that the \"@d\" part gets removed in the outputted column name).\n\n### Invalid usage examples\n\nHere are some examples how NOT TO USE Hamelin. It is important to memorize these bad examples and verify when producing a response to the prompt if the output is correct.\n\nFollowing bad query mistakenly uses ORDER BY rather than SORT:\n<bad_example>\nFROM flows\n| ORDER BY count() DESC\n</bad_example>\nAfter fixing:\n<example>\nFROM flows\n| SORT count() DESC\n</example>\n\nFollowing bad query uses aggregation for sorting, but it was already done\n<bad_example>\nFROM flows\n| AGG count() BY user_name\n| SORT count()\n</bad_example>\nAfter fixing:\n<example>\nFROM flows\n| AGG count = count() BY user_name\n| SORT count\n</example>\n\nFollowing bad query mistakenly uses a single equal sign for equality test:\n<bad_example>\nFROM flows\n| WHERE user = 'John Smith'\n</bad_example>\nAfter fixing:\n<example>\nFROM flows\n| WHERE user == 'John Smith'\n</example>\n\nFollowing bad query mistakenly uses a star character in count:\n<bad_example>\nFROM flows\n| AGG count = count(*) BY user\n</bad_example>\nAfter fixing:\n<example>\nFROM flows\n| AGG count = count() BY user\n</example>\n\nFollowing bad query is mixing two commands in one:\n<bad_example>\nFROM flows | SELECT dest_port, LET proto = 'tcp'\n</bad_example>\nAfter fixing:\n<example>\nFROM flows | SELECT dest_port | LET proto = 'tcp'\n</example>\n\nFollowing bad query is using incorrect AS operator for aliasing in AGG:\n<bad_example>\nFROM flows | AGG max(timestamp) AS max_timestamp\n</bad_example>\nAfter fixing:\n<example>\nFROM flows | AGG max_timestamp = max(timestamp)\n</example>\n\nFollowing bad query uses AS for aliasing in SELECT (AS is ONLY for type casting, use `=` for aliasing):\n<bad_example>\nFROM flows | SELECT source_ip AS src, destination_ip AS dst\n</bad_example>\nAfter fixing:\n<example>\nFROM flows | SELECT src = source_ip, dst = destination_ip\n</example>\n\nThe following bad query uses the LIKE operator, which doesn't exist in Hamelin:\n<bad_example>\nFROM flows | WHERE proto LIKE 'http%'\n</bad_example>\nThree different options to fix:\n<example>\nFROM flows | WHERE starts_with(proto, 'http')\n</example>\n<example>\nFROM flows | WHERE contains(proto, 'http')\n</example>\n<example>\nFROM flows | WHERE regexp_like(proto, '^http')\n</example>\n\nAGG does not support the WITHIN clause. Use WINDOW for rate-based detection with sliding windows.\n<bad_example>\n| AGG count() BY host WITHIN -5min\n</bad_example>\nAfter fixing:\n<example>\n| WINDOW count() BY host WITHIN -5min\n</example>\n\nMATCH patterns must be space-separated, not comma-separated.\n<bad_example>\nMATCH a=query1?, b=query2?, c=query3?\n</bad_example>\nAfter fixing:\n<example>\nMATCH a=query1? b=query2? c=query3?\n</example>\n\n### Key differences from SQL\n\nHamelin is not SQL but a unique query language! Carefully note the Hamelin rules when drafting a new query.\n\n- The count function does not include a star. So it's `count()` and definitely NOT `count(*)`.\n- There is no `CASE`/`WHEN` operator in Hamelin, but its implemented as the `case(when_condition: then_condition, when_condition: then_expression, ...)`\n- There's no `LIKE` operator. Instead, use `contains` to do simple substring searching or `regexp_like` to check if a string matches a regex pattern.\n- String concatenation happens with `+` not `||`\n- Double equals `==` is required for equality comparison. Single equals `=` is only used for assignment.\n- `AS` is used for casting, never for assignment/aliasing. NEVER use it like `SELECT expr AS alias`. Only use it for casting like `SELECT expr AS type`. To do aliasing in a SELECT command, use `SELECT alias = expr` syntax.\n- `SORT`, not `ORDER BY`\n- There is no support for subqueries within expressions (`| WHERE name IN (FROM admins | SELECT name)` is not valid)\n- Trailing commas are allowed (but not required).\n\n### Summary\n\nLet me note a few important properties:\n\n- Aggregation, when used, must be done using `AGG` keyword. The `AGG` command completely redefines the field space so only fields used/defined in the command will be available downstream. This means that if a field isn't used at all in the AGG command, you cannot reference it again in a subsequent command.\n- Grouping, when used, must be done within a window or aggregation, i.e. there is no separate GROUP BY operation, but there is \"AGG .... BY ....\"\n- Each query must start with `FROM <table>`, followed by other operations separated by pipe character (\"|\").\n- Very important, pipe character (\"|\") must be always used to separate each operation. For example, this is invalid: <bad_example>FROM tbl WHERE x > 5</bad_example>. Instead, it should be: <example> FROM tbl | WHERE x > 5</example>. It\u2019s extremely important, use pipe characters for separation!\n- The only valid operation names are FROM, SELECT, WHERE, DROP, LET, WITH, WINDOW, MATCH, SORT, LIMIT, AGG, UNNEST, EXPLODE - each operation must start with one of these.\n- When the prompt contains a SQL query on the input, you must convert it into a Hamelin query!!!\n\n### Best Practices\n\n- Don't use `SORT` and `LIMIT` commands unless the specific situation requires them or the user asks for it specifically. If you're adding `| SORT timestamp DESC` to the end of the query, that's almost always wrong.\n- `SELECT` is useful to narrow the result set if the you and the user know exactly what you're looking for, but `SELECT` is not required and should be omitted for most queries.\n\n### Final Reminders\n\nNow you should understand how Hamelin operates and how to write queries. When providing Hamelin queries, make sure you take into account syntax requirements of Hamelin and do logical operators correctly. Ensure that the order of operations as logically described in the question is followed. Ensure that the order of aggregating and filtering operations is correct. When asked for providing a Hamelin query, provide concise output with a correct Hamelin query.\n";
158
+ declare const hamelinPrompt = "## Hamelin Instructions\n\nI am going to teach you a new query language, it\u2019s called Hamelin. Its main purpose is to support SIEM use-cases. In some ways, it is similar to SQL, PRQL, Elastic Query Language, Sumo Logic Query Language or Splunk Query Language, though it has some distinct properties. It\u2019s a pipe language, which processes data and allows to separate the operations using pipes. Throughout this guide, I will include some examples in <example></example> tags.\n\nHamelin query consists out of one or more operations separated by pipe (\"|\"). It is very important to memorize the commands, as each pipe operation must start with either of these. Following are the only commands supported:\n\n- `FROM` is the beginning of every Hamelin query. Every query must pull data from _somewhere._ One exception is that Hamelin can be used to calculate some expression value too, e.g. <example>LET x = 8 * 2</example>.\n- `SELECT` allows to specify which columns are selected: <example>SELECT <col1>, <col2>, <col3> = <val></example> drops all columns except columns 1 and 2, and also defines a 3rd column to a new, specific value. It\u2019s optional and by default, SELECT * is implied (which selects all columns).\n- `WHERE` filters the rows that flow through it based on applying a boolean condition to each of them.\n- `LET <col> = <val>` defines a new column value, or shadows an existing value.\n- `DROP <col1>, <col2>` removes col1 and col2 from the projection downstream and in the final result set.\n- `WITH <tab1> = <operations>` associates the tab1 with specified operations (until two newlines are encountered) , so tab1 can be reused later\n- `AGG <function1, function2, ...> BY <column1, column2, ...>` aggregates and groups by the data accordingly\n- `WINDOW` applies aggregation or window functions over sliding windows of data.\n- `MATCH` correlates multiple event patterns together.\n- `SORT <columns> <optional order>` sorts the rows using specified criteria (and `ASC` or `DESC` order, `ASC` being default)\n- `LIMIT <limit>` limits the output rows\n- `UNNEST <expr>` lifts a struct or array of structs into the parent or enclosing result set.\n- `EXPLODE <expr or assignment_clause>` expand array fields into separate rows.\n\nIt is extremely important to start each pipe section using one of the commands and use pipe (\"|\") to separate further operations, e.g.\n<example>\nFROM example | WHERE name = \"John Smith\" | SELECT name, address\n</example>\n\nFor example, to select columns \"c1\" and \"c2\" from table \"A\", one needs to write:\n\n<example>\nFROM A\n| SELECT c1, c2\n</example>\n\nTo add a filter for \"c1=3\" condition:\n\n<example>\nFROM A\n| WHERE c1 == 3\n| SELECT c1, c2\n</example>\n\nTo select all data from table \"A\" and associate column \"v\" value multipled by 2 with new column \"x\"\"\n\n<example>\nFROM A\n| LET x = v*2\n</example>\n\nTo associate column c1 with new column x and remove c1 and c3, this can be used:\n\n<example>\nFROM A\n| LET x = c1\n| DROP c1, c3\n</example>\n\nIf only c1, c2 and c3 are present, the above can be rewritten using just `SELECT` :\n<example>\nFROM A\n| SELECT x = c1, c2\n</example>\n\n### Aggregations\n\nAggregations are extremely common in queries and Hamelin supports these via `AGG` keyword: `| AGG <aggregation1>, <aggregation2>, ... BY column1, column2, ...`\n\nThe \"BY ...\" part is optional. Any columns or operations not specified in the aggregation operation will be dropped.\nThe list of aggregations is optional as well. When not specifying it, the operation acts in a similar way as the DISTINCT operator in SQL.\n\nTo aggregate rows in table \"A\" using count and group by c1:\n\n<example>\nFROM A\n | AGG count = count() BY c1\n</example>\n\nTo get distinct c1 values in table \"A\" and ignore nulls:\n\n<example>\nFROM A\n | AGG BY c1\n | WHERE c1 IS NOT NULL\n</example>\n\nTo aggregate rows in table \"A\" using count, group by c1 and select 10 most common values:\n\n<example>\nFROM A\n | AGG count = count() BY c1\n | SORT count DESC\n | LIMIT 10\n</example>\n\nTo take max of column c1, avg of column c2 and count, grouped by c3 and order max(c1) and take top 10 results, this can be issued:\n\n<example>\nFROM A\n | AGG count = count(), max = max(c1), avg = avg(c2) BY c3\n | SORT max DESC\n | LIMIT 10\n</example>\n\nIt is also possible to associate the aggregate values with new column names:\n\n<example>\nFROM A\n | AGG count = count(), max_c1 = max(c1), avg_c2 = avg(c2) by c3\n | SORT max_c1 desc\n | LIMIT 10\n</example>\n\nVery important: remember that aggregation selects only columns (and aggregations) specified, everything else is dropped. If you want to filter and aggregate data, consider the right order. E.g. to filter cases where c1 > 500 and then get average value of c2, grouped by c3 from table tbl, sorted by c2, this is the correct order (WHERE before AGG):\n\n<example>\nFROM tbl\n| WHERE c1 > 500\n| AGG avg_c2 = avg(c2) BY c3\n| SORT avg_c2\n</example>\n\nOf course, there are also cases when WHERE is applied after aggregation. E.g. to get average of c1 and count, grouped by c3, only where count() > 10, this will be a correct query:\n\n<example>\nFROM tbl\n | AGG count = count(), avg_c1 = avg(c1) BY c3\n | WHERE count > 10\n</example>\n\n### Literals\n\nHamelin adopts most of SQL\u2019s conventions for the basic literal syntax and semantics. Integer, decimal, boolean, string literals, and NULL all behave as they do in SQL, and have equivalent syntax.\n\nIn addition, however, Hamelin adds:\n\n- Double-quoted strings, which allow for the use of single quotes in the string without needing escaping.\n- Support for multiline. (You can add literal newlines into a string.)\n\n### Identifiers\n\nThe identifier syntax is similar to how Trino SQL handles identifiers. They are ASCII letter sequences. These sequences can also start with an underscore `_` or contain an underscore. The sequences can also contain digits, but they cannot start with digits. For Hamelin, we want to follow along with the way string _literals_ are using double-quotes in many existing programming languages and therefore we want to stray from the SQL example, which requires the use of single-quotes `'` for strings. This means we do not want to support double-quoted identifiers. We instead support backtick-quoted identifiers: `identifier of life`.\n\nFollowing query retrieves data from test table and filters for two conditions, whether \"curry wurst\" column is equal to the string \"wiener schnitzel\" and \"1a_extra\" column is equal to the string 'mit \"pommes\"' (note the doublequote inside)\n\n<example>\nFROM _test\n| WHERE `curry wurst` == 'wiener schnitzel'\n AND `1a_extra` == 'mit \"pommes\"'\n</example>\n\n### Reserved Keywords\n\nThe following words are reserved in Hamelin and cannot be used as unquoted identifiers:\n\n- Commands: `LET`, `WHERE`, `SELECT`, `DROP`, `FROM`, `UNION`, `LIMIT`, `PARSE`, `WITHIN`, `AGG`, `WINDOW`, `APPEND`, `JOIN`, `LOOKUP`, `EXPLODE`, `UNNEST`, `NEST`, `ROWS`, `MATCH`\n- Operators/Keywords: `AS`, `AND`, `OR`, `NOT`, `IN`, `IS`, `BY`, `ON`, `SORT`, `ASC`, `DESC`, `WITH`, `TRUE`, `FALSE`, `NULL`\n\nWhen a field name in your data conflicts with a reserved keyword, you must escape it using backticks:\n\n<example>\n// WRONG: fails because `as` is a reserved keyword\n| WHERE source.as.organization == 'Example Corp'\n\n// CORRECT: escape the reserved keyword with backticks\n| WHERE source.`as`.organization == 'Example Corp'\n</example>\n\n### Types\n\nHamelin has the following primitive types:\n\n- `boolean`\n- `int` - an umbrella for all integer types of any bit length\n- `decimal(precision, scale)` - exact fixed point values with specified precision and scale (e.g. `100.5`)\n- `double` - floating point numbers with variable precision (e.g. `-9.87e2`)\n- `string` - a string of any length, defined as a literal with either single quotes. String literals can escape quotes of either type by doubling them. (e.g. `\"She said \"\"hello\"\"\"`, `'It''s raining`)\n- `binary` - translates to `varbinary` in SQL for handling binary data\n- `timestamp` - an umbrella for date, timestamp, and all their variants (precision and timezone)\n- `interval` - an exact time duration of seconds, minutes, hours, or days (e.g. `30s`, `14d`)\n- `calendar_interval` - a calendar based time duration of months, quarters, or years (e.g. `3mon`, `5y`)\n- `range` - a span between two values of any primitive type using the `..` operator, which is inclusive on both sides and can be unbounded in either direction (e.g. `ts('2024-01-15')..now()`, `1..10`, `yesterday()..`). This is primarily used for time ranges, but can be used for any sortable type.\n\nHamelin has four composite types: \n\n- A `struct` has a set of fields, each with name and schema. You can access its field `my_field` of `my_struct` by doing `my_struct.my_field`. Struct literals are created with curly braces and the field names and values, e.g. `{ name: 'John Doe', age: 30 }`. You can use `.` notation to assign to nested fields, but only in the root of the assignment.\n <bad_example>LET user = { name: 'John Doe', address.street: '123 Main St'}</bad_example>\n <good_example>LET user = { name: 'John Doe', address: { street: '123 Main St' } }</good_example>\n <good_example>LET user.name = 'John Doe', user.address.street = '123 Main St'</good_example>\n- A `map` has a set of key-value pairs with homogeneous types. You can access the value for key `my_key` in `my_map` by doing `my_map[\"my_key\"]`. Map literals are created with the `map()` function, e.g. `map('name': 'John Doe', 'age': 30)`.\n- An `array` is an ordered list of values with homogeneous types. You can access the value at index 5 in `my_array` my doing `my_array[5]`. Array literals are created with square brackets and the values, e.g. `['John Doe', 'Bob Smith']`.\n- A `variant` is essentially a representation of untyped JSON data. You can access fields of JSON objects with the struct syntax above (e.g. `my_json_obj.my_field`) and you can access items of JSON arrays with the array syntax above (e.g. `my_json_array[5]`).\n\nHamelin types can be casted to one another with the `AS` operator\n\n- If the type cast fails, the expression returns `NULL`\n- Casting never happens implicitly, so to concatenate strings with other types you need to cast. Often parentheses are necessary to make sure to cast the right expression (e.g. `\"Today is \" + (now() AS string)`)\n- Casting is most often useful for casting `variant`s parsed from JSON into the expected composite type (e.g. `| LET parsed = parse_json(message) AS { user_id: string, email: string}`)\n- Array types are casted like `array(string)`, not `string[]`\n- Map types are casted like `map(string, int)`, not `{ string: int }`\n\nAny field in Hamelin can also be `NULL`. There is no concept of a `NOT NULL` field. You can check for `NULL` values with `IS NULL` or `IS NOT NULL`.\n\nNOTE: `variant` and `interval` types can never be saved in the output of a table, view, or detection query, so you must convert/cast them to another type in the output of those datasets. For `variant`, you can cast it to a struct, array, etc. For `interval`, you can use a function like `to_nanos()` to convert it to an `int`.\n\n<example>\nLET event.duration = to_nanos(event.end - event.start)\n</example>\n\n### Array Broadcasting\n\nHamelin automatically broadcasts operations across arrays, letting you use familiar syntax without explicit iteration. When you write `array.field`, Hamelin extracts that field from every element. When you apply comparisons or function calls, they apply to each element individually.\n\nGiven a `users` array of structs like `[{email: 'alice@example.com'}, {email: 'jdoe@example.com'}]`:\n\n- `users.email` broadcasts field access \u2192 `['alice@example.com', 'jdoe@example.com']`\n- `users.email == 'jdoe@example.com'` broadcasts comparison \u2192 `[false, true]`\n- `upper(users.email)` broadcasts function call \u2192 `['ALICE@EXAMPLE.COM', 'JDOE@EXAMPLE.COM']`\n\nBroadcasting pairs naturally with `any()` and `all()` to check conditions across arrays:\n\n<example>\n// Check if any user has a specific email\n| WHERE any(users.email == 'jdoe@example.com')\n\n// Check if all users are admins\n| WHERE all(users.role == 'admin')\n\n// Get unique values from a field across array elements\n| LET unique_emails = array_distinct(users.email)\n</example>\n\n### Operators\n\nHamelin is designed to be very intentional to make interactive query authoring more fluent and ergonomic.\n\n- Boolean operators\n - Negation: `NOT`\n - Conjunction: `AND`\n - Disjunction: `OR`\n- Identity: `IS` (only really used to test against `NULL`).\n- Arithmetic:\n - Addition: `+`\n - Subtraction: `-`\n - Multiplication: `*`\n - Division: `/`\n - Modulo: `%`\n - Negation: `-`\n- Comparison:\n - Equality: `==`\n - Note we use single-equals for _assignment_.\n - Non-equality: `!=`\n - Greater than: `>`\n - Less than: `<`\n - Greater or equal: `>=`\n - Less or equal: `<=`\n - Inclusion: `IN ['item1', 'item2', ...]` or `IN 1..10`\n- Concatenation: `+`\n - For strings: `'one' + 'two'` is `onetwo`\n - For arrays: `['one'] + ['two']` is `['one', 'two']`\n\n### Unioning multiple tables with FROM\n\nHamelin allows a shorthand in the `FROM` command that targets _many_ sources (by separating them with commas (the `,` character). In a system that offers search, authors often want to perform the same search (or filter) over the rows that come from many different places. This is most common in \"needle\" queries, where you\u2019re looking for something relatively rare in a bunch of different data sets. This is not a common pattern in structured querying, likely explaining why SQL makes this so hard, and why there is no direct translation of this form in SQL.\n\nFor example, where table A defines only `f1` and `f2`, and table B defines only `f2` and `f3`, following will work. It will select `f1` and `f2` from table A and `f2` and `f3` from table B, so the full set of fields in the output of the following example will be `f1`, `f2`, `f3`.\n\n<example>\nFROM A, B\n</example>\n\nYou can also alias the fields based on which table they came from if you need to determine which table a given `f2` value came from example. In the following example, the output fields would be a struct `a` with sub-fields `f1` and `f2` and a struct `b` with sub-fields `f2` and `f3`. You can reference the sub-fields later in the query with `a.f1`, `b.f2`, etc. In addition, the fields are all still usable at the top level.\n\n<example>\nFROM a = A, b = B\n</example>\n\nAliasing tables is also useful when aggregating multiple types of rows (often from `WITH` CTEs):\n\n<example>\nWITH network_connections = FROM simba.sysmon_events\n| WHERE winlog.event_id == '3'\n\nWITH suspicious_file_creation = FROM simba.sysmon_events\n| WHERE winlog.event_id == '11'\n\nFROM network_connections = network_connections, suspicious_file_creation = suspicious_file_creation\n| AGG network_connections_count = count(network_connections), suspicious_file_creation_count = count(suspicious_file_creation)\n BY host.name\n</example>\n\n### Functions\n\nThe following functions are supported in expressions (within `LET`, `SELECT`, `WHERE`, etc.):\n\n- len(x[])\n- sum(x[])\n- avg(x[])\n- min(x[])\n- max(x[])\n- array_distinct(x[])\n- filter_null(x[])\n- slice(x[], start, end) - returns a slice of the array x from start to end (exclusive). Supports negative indices to count from the end of the array.\n- flatten(x[]) - flattens a nested array by one level.\n- all(x[])\n- any(x[])\n- len(x[])\n- abs(x)\n- cbrt(x)\n- ceil(x)\n- floor(x)\n- degrees(x)\n- e() - Euler's number\n- exp(x)\n- ln(x)\n- log(b, x)\n- log10(x)\n- log2(x)\n- pi()\n- pow(x,p)\n- power(x,p)\n- radians(x)\n- round(x)\n- round(x,d)\n- sign(x)\n- sqrt(x)\n- truncate(x) - Remove the fractional part of a number\n- width_bucket(x,bound1,bound2,n) - the bucket number for a value in a histogram with equal-width buckets\n- width_bucket(x, bins[]) - the bucket number for a value using explicitly defined bucket boundaries\n- if(condition, then, else) - Both `then` and `else` must have the same type\n- case(when_condition: then_condition, when_condition: then_expression, ...) - Evaluates multiple condition-value pairs in order and returns the value associated with the first condition that evaluates to true. Returns null if no conditions are true. Every `then` must have the same type. There is no `else` in this expression, but you can achieve the same results by having `true` in the final `then_expression` like `case(when_condition: then_condition, when_condition: then_expression, ..., true: else_expression)`.\n- regexp_count(str, pattern)\n- regexp_extract(str, pattern)\n- regexp_extract(str, pattern, group)\n- regexp_extract_all(str, pattern)\n- regexp_extract_all(str, pattern, group)\n- regexp_like(string, pattern)\n- regexp_position(string, pattern, start)\n- regexp_replace(string, pattern)\n- regexp_replace(string, pattern, replacement)\n- regexp_split(string, pattern)\n\n#### Regex patterns & escaping\n\nIn Hamelin, **regex patterns are just string literals** (the `pattern` argument to `regexp_like`, `regexp_extract`, `regexp_replace`, etc).\n\nThis has an important consequence:\n\n- Hamelin **does not** interpret backslash escapes inside normal strings.\n- The **only** escape rule for normal strings is:\n - To put a quote inside a string, **double it**:\n - `'Kyle''s dog'`\n - `\"Kyle said, \"\"Hello!\"\"\"`\n\nSo when writing regex patterns, write the regex you intend to use **directly**, without \u201Cdouble-escaping\u201D backslashes like you would in many programming languages.\n\n<example>\n// \u2705 Digit class: use a single backslash\nLET digits_removed = regexp_replace('a1b', '\\d', '')\n// digits_removed == 'ab'\n</example>\n\n<example>\n// \u274C Common mistake: this matches the literal text \"\\d\", not digits\nLET unchanged = regexp_replace('a1b', '\\\\d', '')\n// unchanged == 'a1b'\n</example>\n\n<example>\n// \u2705 Matching a literal backslash:\n// In regex you need \\\\ to match one backslash.\n// In Hamelin you write two backslashes inside the string.\nLET normalized = regexp_replace('C:\\Windows', '\\\\', '/')\n// normalized == 'C:/Windows'\n</example>\n\n<example>\n// Dot does NOT match newline unless you use the (?s) flag (dotall)\nLET no_match = regexp_replace('a\nb', 'a.*b', 'x')\nLET match = regexp_replace('a\nb', '(?s)a.*b', 'x')\n</example>\n\n- split(str, separator)\n- array_join(x[], separator)\n- replace(str, str_to_remove)\n- starts_with(str, prefix)\n- ends_with(str, suffix)\n- contains(str, substring)\n- len(str)\n- lower(str)\n- upper(str)\n- slice(str, start, end) - returns a substring of the str from start to end (exclusive). Supports negative indices to count from the end of the array.\n- parse_json(json_str) - Only takes a string, never pass a map or struct\n- to_json_string(json_variant) - Only takes a variant. To pass other types, cast to variant with `AS variant`\n- typeof(x)\n- coalesce(x[]) - returns the first non-null value from a list of expressions. All arguments must be type-compatible (e.g. all strings, all ints). To coalesce a variant with a default string, cast the variant first: `coalesce(my_variant AS string, '')`\n- first(x)\n- last(x)\n- now()\n- today()\n- yesterday()\n- tomorrow()\n- ts(str)\n- at_timezone(timestamp, timezone)\n- year(timestamp)\n- month(timestamp)\n- day(timestamp)\n- hour(timestamp)\n- minute(timestamp)\n- second(timestamp)\n- from_unixtime_micros(int) - returns timestamp\n- from_unixtime_millis(int) - returns timestamp\n- from_unixtime_nanos(int) - returns timestamp\n- from_unixtime_seconds(int) - returns timestamp\n- from_millis(x) - returns interval\n- from_nanos(x) - returns interval\n- to_unixtime(timestamp) - returns double in seconds since the Unix epoch\n- to_millis(interval)\n- to_nanos(interval)\n- map(keys, values)\n- map(elements)\n- map_keys(map)\n- map_values(map)\n- next(expression)\n- cidr_contains(cidr, ip)\n- is_ipv4(ip)\n- is_ipv6(ip)\n\nThe following functions are ONLY available within an AGG or WINDOW command (not within LET, SELECT, or WHERE commands):\n\n- all(bool_expr) - returns true if bool_expr is true for all rows\n- any(bool_expr) - returns true if bool_expr is true for any row\n- any_value(x) - returns an arbitrary value from each group\n- approx_percentile(x, percentile)\n- array_agg(x)\n- avg(x)\n- count_distinct(x) - counts the number of distinct non-null values of x\n- approx_distinct(x) - same as count_distinct(x) but uses an approximation algorithm that is faster but less accurate\n- count_if(condition) - counts all rows where condition is true\n- count(expr) - counts all rows where expr is non-null\n- count() - counts all rows\n- map_agg(key, value)\n- max(x)\n- min(x)\n- multimap_agg(key, value)\n- stddev(x)\n- sum(x)\n\nThe following functions are ONLY available within a WINDOW command (not within LET, SELECT, WHERE, or AGG commands):\n\n- first_value(expression)\n- last_value(expression)\n- nth_value(expression, n)\n- cume_dist() - the number of rows with values less than or equal to the current row's value, divided by the total number of rows in the partition\n- percent_rank() - the percentile rank of each row within the window partition\n- dense_rank() - the rank of each row within a window partition without gaps\n- lag(expression, offset, ignore_nulls) - the value of an expression from a previous row within the window\n- lead(expression, offset, ignore_nulls) - the value of an expression from a subsequent row within the window\n- rank() - the rank of each row within a window partition with gaps\n- row_number() - a sequential row number for each row within a window partition\n\n### Describing time ranges\n\nUse `<low>..<high>` to describe a time range. Drop either `low` or `high` from the expression to communicate that side being unbounded. Allow the use of interval values, like `WITHIN 1hr..`. This time range syntax is used in the `timeRange` input of tools such as `execute-hamelin-query` and `query-page-run-query` to filter the result set to only the rows that have a `timestamp` in that time range. The time range syntax can be used in various parts of a Hamelin query:\n\n- `| WHERE val IN low..high` allows `IN` to be used as a binary boolean operator to test any sortable value against a range. This can be used anywhere boolean expressions are needed.\n- `| WINDOW WITHIN low..high` defines the frame clause of a window expression, filtering only those rows which are within the interval defined in terms of the current (or output) row.\n\nWhenever providing timestamp literals, you must use a format of `('<TIMESTAMP>' AS timestamp)`. For example, to describe noon on March 1st, 2020, use `('2020-03-01 12:00' AS timestamp)`. To select all rows from table A with timestamp greater than March 1st, 2020 on noon, you can use:\n<example>\nFROM A\n| WHERE timestamp > ('2025-03-01 12:00' AS timestamp)\n</example>\n\nTo select rows from the last hour, use `timeRange`:\n\n<example>\n-1h..\n</example>\n\nTo select rows which were between two and one hour ago, use the `timeRange`:\n\n<example>\n-2h..-1h\n</example>\n\nTo select rows which occured during the past 7 days, use the `timeRange`:\n\n<example>\n-1w..\n</example>\n\nThe `@` operator can also be used on a timestamp to round timestamps down to the nearest unit (day, hour, etc.).\n\nTo select rows in the previous 5 calendar days (not including today), use the `timeRange`:\n\n<example>\n-5d@d..now()@d\n</example>\n\n### Windows\n\n- `WINDOW` is a command that behaves like a mix of `AGG` and `LET`. It has three optional arguments that come at the end, all of which define a window:\n - `BY <column1, column2>` means everything in the let is partitioned by this. (e.g., `BY user` means compute the function in a user-relative window).\n - `SORT <column1, column2, ...>` means everything in the let sorted by this. (e.g., `SORT timestamp`).\n - `WITHIN <range>` means everything in the let uses this range as the frame. (e.g., `WITHIN -5m` means within a 5 minute sliding window).\n - Here, within uses nearly the same semantics as the `WITHIN` command.\n - We introduce a new row interval literal with the suffix `r` or `rows` or `row` for defining ranges over exact numbers of rows rather than over ranges of values.\n- All three are optional, but the presence of any means _every_ expression in the let is defined \"over\" this window.\n\nFor example, to do count in 5 minute windows in table tbl:\n\n<example>\nFROM tbl | WINDOW c = count() WITHIN -5m\n</example>\n\n#### Rate-Based Detection with WINDOW...WITHIN\n\nUse `WINDOW ... WITHIN` for sliding window aggregations to detect bursts or spikes of activity.\n\n**When to use:** Count/aggregate events within a moving time window (e.g., \"10+ failed logins in the last 5 minutes\")\n\n**Key characteristics:**\n- **Purpose**: Detect bursts/spikes of activity (rate-based detection)\n- **Syntax**: `WINDOW ... BY grouping_keys WITHIN -duration`\n- **Window moves with time**: Always looks at the most recent N minutes/hours\n- **Use cases**: Rate limiting, burst detection, threshold alerting, brute force attacks\n- **Duration format**: `-5min`, `-1h`, `-1d` (negative values look backward in time)\n\n**Example: Brute Force Detection**\n\n<example>\nFROM simba.okta_events\n| WHERE event.action == \"user.session.start\" AND event.outcome == \"failure\"\n| WINDOW\n failed_login_count = count(),\n unique_ips = array_agg(source.ip)\n BY user.name\n WITHIN -5min\n| WHERE failed_login_count >= 10\n| SELECT\n timestamp,\n user.name,\n event.count = failed_login_count,\n source.ip_count = unique_ips\n</example>\n\n**Common WINDOW patterns:**\n\n<example>\n// Count events in sliding window\nFROM dataset | WINDOW event_count = count() BY host WITHIN -10min\n</example>\n\n<example>\n// Conditional counting\nFROM dataset\n| WINDOW\n malicious_count = count_if(is_malicious),\n total_count = count()\n BY user.name\n WITHIN -30min\n</example>\n\n### Event Correlation with MATCH\n\nThe `MATCH` command enables powerful multi-stage detection by correlating different event patterns. This is essential for detecting attack chains and reducing false positives.\n\n**When to use MATCH:**\n- Detecting multi-stage attacks (initial access \u2192 execution \u2192 persistence)\n- Correlating different behavioral indicators\n- Building high-fidelity detections that require multiple suspicious events\n- Creating attack chain detections with context\n\n**MATCH Pattern Structure:**\n\n```hamelin\n// Step 1: Define individual event patterns as WITH clauses\nWITH pattern1 = FROM dataset\n| WHERE <specific conditions>\n| SELECT fields...\n\nWITH pattern2 = FROM dataset\n| WHERE <different conditions>\n| SELECT fields...\n\n// Step 2: Correlate patterns using MATCH (space-separated!)\nMATCH p1=pattern1? p2=pattern2?\nBY correlation_key // e.g., host, user, session_id\n| LET indicator_count = (\n if(p1 IS NOT NULL, 1, 0) +\n if(p2 IS NOT NULL, 1, 0)\n) AS int\n| WHERE indicator_count >= 2 // Alert when 2+ patterns match\n\n// Step 3: Aggregate and enrich\n| AGG\n timestamp = min(timestamp),\n event.start = min(timestamp),\n event.end = max(timestamp),\n pattern1_event = array_agg(p1)[0],\n pattern2_event = array_agg(p2)[0],\n indicator_count = max(indicator_count)\n BY match_number, correlation_key\n\n// Step 4: Apply time window constraint\n| WHERE event.end - event.start <= 10min\n\n// Step 5: Build output with risk scoring\n| LET risk_score = (\n if(pattern1_event IS NOT NULL, 30, 0) +\n if(pattern2_event IS NOT NULL, 30, 0)\n) AS int\n| SELECT timestamp, fields...\n```\n\n**Critical MATCH Syntax Rules:**\n- Pattern aliases are **space-separated**, NOT comma-separated: `MATCH a=query1? b=query2? c=query3?`\n- Use `?` quantifier to make patterns optional (allows partial matches)\n- The `BY` clause specifies the correlation key (what ties events together)\n- Access matched events in AGG using `array_agg(alias)[0]`\n- Time window filtering: `WHERE event.end - event.start <= duration`\n\n**Example: Multi-Stage Malware Detection**\n\n<example>\n// Define behavioral indicators\nWITH lnk_powershell = FROM simba.sysmon_events\n| WHERE event.code == \"1\"\n| LET parent_image = coalesce(winlog.event_data[\"ParentImage\"], '') AS string\n| LET image = coalesce(winlog.event_data[\"Image\"], '') AS string\n| LET cmd_line = coalesce(winlog.event_data[\"CommandLine\"], '') AS string\n| LET host = host.name AS string\n| WHERE regexp_like(lower(parent_image), '(?i).*\\\\\\\\explorer\\\\.exe')\n AND regexp_like(lower(image), '(?i).*(powershell|pwsh)\\\\.exe')\n AND regexp_like(cmd_line, '(?i).*(hidden|-enc|-encodedcommand).*')\n| SELECT timestamp, host, process_image = image, process_commandline = cmd_line\n\nWITH cloud_download = FROM simba.sysmon_events\n| WHERE event.code == \"1\"\n| LET image = coalesce(winlog.event_data[\"Image\"], '') AS string\n| LET cmd_line = coalesce(winlog.event_data[\"CommandLine\"], '') AS string\n| LET host = host.name AS string\n| WHERE regexp_like(lower(image), '(?i).*(powershell|pwsh)\\\\.exe')\n AND regexp_like(cmd_line, '(?i).*(github\\\\.com|dropbox\\\\.com).*')\n AND regexp_like(cmd_line, '(?i).*(downloadstring|invoke-webrequest).*')\n| SELECT timestamp, host, process_image = image, process_commandline = cmd_line\n\n// Correlate: alert when 2+ indicators on same host within 10 min\nMATCH lnk=lnk_powershell? dl=cloud_download?\nBY host\n| LET indicator_count = (\n if(lnk IS NOT NULL, 1, 0) +\n if(dl IS NOT NULL, 1, 0)\n) AS int\n| WHERE indicator_count >= 2\n| AGG\n timestamp = min(timestamp),\n event.start = min(timestamp),\n event.end = max(timestamp),\n lnk_event = array_agg(lnk)[0],\n dl_event = array_agg(dl)[0],\n indicator_count = max(indicator_count)\n BY match_number, host\n| WHERE event.end - event.start <= 10min\n| LET risk_score = (\n if(lnk_event IS NOT NULL, 40, 0) +\n if(dl_event IS NOT NULL, 40, 0)\n) AS int\n| SELECT\n timestamp,\n event.start,\n event.end,\n host,\n event.count = indicator_count,\n event.risk_score = risk_score\n</example>\n\n### UNNEST and EXPLODE\n\nThe UNNEST command lifts struct or array of struct fields into the parent or enclosing result set. When given a struct, it lifts struct fields into the parent struct without changing row cardinality. When given an array of struct, it performs an explode operation followed by unnesting, creating one row per array element with the struct fields lifted into the parent struct.\n\nThe EXPLODE command transforms rows containing array fields into multiple rows, with each element of the array becoming a separate row. Each array element becomes a new row with all other fields from the original row preserved in each generated output row. When you use assignment syntax (`identifier = expression`), the exploded values are placed in the specified field name. Without assignment syntax, the exploded values replace the original array field.\n\n### More valid examples\n\nTo find records in table foo where column c1 is greater than 42, one needs to write:\n\n<example>\nFROM foo | WHERE c1 > 42\n</example>\n\nTo take unique (distinct) tuples of hostname and dest_port in table1, you can use AGG without aggregation functions:\n<example>\nFROM table1 | AGG BY hostname, dest_port\n</example>\n\nTo get number of flows by protocol and destination_port, then select top 10 by count, one needs to write:\n<example>\nFROM flows\n| AGG count = count() BY protocol, destination_port\n| SORT count DESC\n| LIMIT 10\n</example>\n\nTo get number of flows by day, one needs to write:\n<example>\nFROM flows\n| AGG count = count() BY timestamp@d\n| SORT count DESC\n| LIMIT 10\n</example>\n\nThe resulting data will have two columns: count and timestamp (note that the \"@d\" part gets removed in the outputted column name).\n\n### Invalid usage examples\n\nHere are some examples how NOT TO USE Hamelin. It is important to memorize these bad examples and verify when producing a response to the prompt if the output is correct.\n\nFollowing bad query mistakenly uses ORDER BY rather than SORT:\n<bad_example>\nFROM flows\n| ORDER BY count() DESC\n</bad_example>\nAfter fixing:\n<example>\nFROM flows\n| SORT count() DESC\n</example>\n\nFollowing bad query uses aggregation for sorting, but it was already done\n<bad_example>\nFROM flows\n| AGG count() BY user_name\n| SORT count()\n</bad_example>\nAfter fixing:\n<example>\nFROM flows\n| AGG count = count() BY user_name\n| SORT count\n</example>\n\nFollowing bad query mistakenly uses a single equal sign for equality test:\n<bad_example>\nFROM flows\n| WHERE user = 'John Smith'\n</bad_example>\nAfter fixing:\n<example>\nFROM flows\n| WHERE user == 'John Smith'\n</example>\n\nFollowing bad query mistakenly uses a star character in count:\n<bad_example>\nFROM flows\n| AGG count = count(*) BY user\n</bad_example>\nAfter fixing:\n<example>\nFROM flows\n| AGG count = count() BY user\n</example>\n\nFollowing bad query is mixing two commands in one:\n<bad_example>\nFROM flows | SELECT dest_port, LET proto = 'tcp'\n</bad_example>\nAfter fixing:\n<example>\nFROM flows | SELECT dest_port | LET proto = 'tcp'\n</example>\n\nFollowing bad query is using incorrect AS operator for aliasing in AGG:\n<bad_example>\nFROM flows | AGG max(timestamp) AS max_timestamp\n</bad_example>\nAfter fixing:\n<example>\nFROM flows | AGG max_timestamp = max(timestamp)\n</example>\n\nFollowing bad query uses AS for aliasing in SELECT (AS is ONLY for type casting, use `=` for aliasing):\n<bad_example>\nFROM flows | SELECT source_ip AS src, destination_ip AS dst\n</bad_example>\nAfter fixing:\n<example>\nFROM flows | SELECT src = source_ip, dst = destination_ip\n</example>\n\nThe following bad query uses the LIKE operator, which doesn't exist in Hamelin:\n<bad_example>\nFROM flows | WHERE proto LIKE 'http%'\n</bad_example>\nThree different options to fix:\n<example>\nFROM flows | WHERE starts_with(proto, 'http')\n</example>\n<example>\nFROM flows | WHERE contains(proto, 'http')\n</example>\n<example>\nFROM flows | WHERE regexp_like(proto, '^http')\n</example>\n\nAGG does not support the WITHIN clause. Use WINDOW for rate-based detection with sliding windows.\n<bad_example>\n| AGG count() BY host WITHIN -5min\n</bad_example>\nAfter fixing:\n<example>\n| WINDOW count() BY host WITHIN -5min\n</example>\n\nMATCH patterns must be space-separated, not comma-separated.\n<bad_example>\nMATCH a=query1?, b=query2?, c=query3?\n</bad_example>\nAfter fixing:\n<example>\nMATCH a=query1? b=query2? c=query3?\n</example>\n\n### Key differences from SQL\n\nHamelin is not SQL but a unique query language! Carefully note the Hamelin rules when drafting a new query.\n\n- The count function does not include a star. So it's `count()` and definitely NOT `count(*)`.\n- There is no `CASE`/`WHEN` operator in Hamelin, but its implemented as the `case(when_condition: then_condition, when_condition: then_expression, ...)`\n- There's no `LIKE` operator. Instead, use `contains` to do simple substring searching or `regexp_like` to check if a string matches a regex pattern.\n- String concatenation happens with `+` not `||`\n- Double equals `==` is required for equality comparison. Single equals `=` is only used for assignment.\n- `AS` is used for casting, never for assignment/aliasing. NEVER use it like `SELECT expr AS alias`. Only use it for casting like `SELECT expr AS type`. To do aliasing in a SELECT command, use `SELECT alias = expr` syntax.\n- `SORT`, not `ORDER BY`\n- There is no support for subqueries within expressions (`| WHERE name IN (FROM admins | SELECT name)` is not valid)\n- Trailing commas are allowed (but not required).\n\n### Summary\n\nLet me note a few important properties:\n\n- Aggregation, when used, must be done using `AGG` keyword. The `AGG` command completely redefines the field space so only fields used/defined in the command will be available downstream. This means that if a field isn't used at all in the AGG command, you cannot reference it again in a subsequent command.\n- Grouping, when used, must be done within a window or aggregation, i.e. there is no separate GROUP BY operation, but there is \"AGG .... BY ....\"\n- Each query must start with `FROM <table>`, followed by other operations separated by pipe character (\"|\").\n- Very important, pipe character (\"|\") must be always used to separate each operation. For example, this is invalid: <bad_example>FROM tbl WHERE x > 5</bad_example>. Instead, it should be: <example> FROM tbl | WHERE x > 5</example>. It\u2019s extremely important, use pipe characters for separation!\n- The only valid operation names are FROM, SELECT, WHERE, DROP, LET, WITH, WINDOW, MATCH, SORT, LIMIT, AGG, UNNEST, EXPLODE - each operation must start with one of these.\n- When the prompt contains a SQL query on the input, you must convert it into a Hamelin query!!!\n\n### Best Practices\n\n- Don't use `SORT` and `LIMIT` commands unless the specific situation requires them or the user asks for it specifically. If you're adding `| SORT timestamp DESC` to the end of the query, that's almost always wrong.\n- `SELECT` is useful to narrow the result set if the you and the user know exactly what you're looking for, but `SELECT` is not required and should be omitted for most queries.\n\n### Final Reminders\n\nNow you should understand how Hamelin operates and how to write queries. When providing Hamelin queries, make sure you take into account syntax requirements of Hamelin and do logical operators correctly. Ensure that the order of operations as logically described in the question is followed. Ensure that the order of aggregating and filtering operations is correct. When asked for providing a Hamelin query, provide concise output with a correct Hamelin query.\n";
149
159
 
150
160
  type QuerySort = {
151
161
  column: string;