@graphenedata/cli 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/cli.js CHANGED
@@ -555,9 +555,9 @@ function analyzeQuery(queryNode) {
555
555
  isAgg ||= !!isSelectDistinct;
556
556
  selects.forEach((s) => {
557
557
  if (s.getChild("Wildcard")) {
558
- let path7 = s.getChild("Wildcard").getChildren("Identifier");
559
- let pathStrings = path7.map((p) => txt(p));
560
- let target = followJoins(path7, scope.table);
558
+ let path8 = s.getChild("Wildcard").getChildren("Identifier");
559
+ let pathStrings = path8.map((p) => txt(p));
560
+ let target = followJoins(path8, scope.table);
561
561
  if (!target) return;
562
562
  target.fields.forEach((f) => {
563
563
  if (isJoin(f) || f.isAgg) return;
@@ -655,8 +655,8 @@ function analyzeExpression(expr, scope) {
655
655
  if (scope.outputFields.includes(field) && field.isAgg) {
656
656
  return { node: "outputField", name: field.name, ...typeInfo, isAgg: field.isAgg };
657
657
  }
658
- let path7 = expr.getChildren("Identifier").map((i) => txt(i));
659
- return { node: "field", path: path7, ...typeInfo, isAgg: field.isAgg };
658
+ let path8 = expr.getChildren("Identifier").map((i) => txt(i));
659
+ return { node: "field", path: path8, ...typeInfo, isAgg: field.isAgg };
660
660
  }
661
661
  case "ExtractExpression": {
662
662
  let e = analyzeExpression(expr.getChild("Expression"), scope);
@@ -1215,11 +1215,11 @@ async function loadWorkspace(dir, includeMd) {
1215
1215
  updateFile(contents, file);
1216
1216
  }
1217
1217
  }
1218
- function updateFile(contents, path7) {
1219
- FILE_MAP[path7] ||= { path: path7, contents, tree: null, tables: [], queries: [] };
1220
- FILE_MAP[path7].contents = contents;
1221
- FILE_MAP[path7].tree = null;
1222
- return FILE_MAP[path7];
1218
+ function updateFile(contents, path8) {
1219
+ FILE_MAP[path8] ||= { path: path8, contents, tree: null, tables: [], queries: [] };
1220
+ FILE_MAP[path8].contents = contents;
1221
+ FILE_MAP[path8].tree = null;
1222
+ return FILE_MAP[path8];
1223
1223
  }
1224
1224
  function analyze(contents, type) {
1225
1225
  clearDiagnostics();
@@ -1294,6 +1294,11 @@ var init_bigQuery = __esm({
1294
1294
  BigQueryConnection = class {
1295
1295
  client;
1296
1296
  constructor(options = {}) {
1297
+ if (process.env.GOOGLE_CREDENTIALS_CONTENT) {
1298
+ let parsed = JSON.parse(process.env.GOOGLE_CREDENTIALS_CONTENT);
1299
+ options.projectId = parsed.project_id;
1300
+ options.credentials = parsed;
1301
+ }
1297
1302
  options.projectId ||= config.googleProjectId;
1298
1303
  options.maxRetries ||= 3;
1299
1304
  options.userAgent ||= "Graphene";
@@ -1385,6 +1390,96 @@ var init_connections = __esm({
1385
1390
  }
1386
1391
  });
1387
1392
 
1393
+ // mdCompile.ts
1394
+ import fs4 from "fs";
1395
+ import path5 from "path";
1396
+ import { visit } from "unist-util-visit";
1397
+ import sanitizeHtml from "sanitize-html";
1398
+ function extractQueries() {
1399
+ function escapeHtml(str) {
1400
+ return str.replace(/&/g, "&amp;").replace(/"/g, "&quot;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
1401
+ }
1402
+ return function transformer(tree) {
1403
+ visit(tree, "code", (node, index, parent) => {
1404
+ if (index === null) return;
1405
+ let name = typeof node.meta === "string" ? node.meta : "";
1406
+ let code = typeof node.value === "string" ? node.value.trim() : "";
1407
+ parent.children[index] = { type: "html", value: `<GrapheneQuery name="${escapeHtml(name)}" code="${escapeHtml(code)}" />` };
1408
+ });
1409
+ };
1410
+ }
1411
+ function escapeAngles() {
1412
+ return function transformer(tree) {
1413
+ visit(tree, "text", (node) => {
1414
+ if (!node.value || typeof node.value !== "string") return;
1415
+ if (!node.value.includes("<")) return;
1416
+ node.value = node.value.replace(/</g, "&lt;");
1417
+ });
1418
+ };
1419
+ }
1420
+ function sanitizeMarkdown() {
1421
+ return function transformer(tree) {
1422
+ visit(tree, "raw", (node) => {
1423
+ if (typeof node.value !== "string") return;
1424
+ let expanded = node.value.replace(/<(\w+)((?:\s[^<>]*?)?)\s*\/>/gi, (_, name, attrs = "") => {
1425
+ let spacing = attrs;
1426
+ return `<${name}${spacing}></${name}>`;
1427
+ });
1428
+ let sanitized = sanitizeHtml(expanded, {
1429
+ ...sanitizeHtml.defaults,
1430
+ allowedTags: [
1431
+ ...sanitizeHtml.defaults.allowedTags,
1432
+ ...componentNames()
1433
+ ],
1434
+ allowedAttributes: {
1435
+ ...sanitizeHtml.defaults.allowedAttributes,
1436
+ ...Object.fromEntries(componentNames().map((n) => [n, ["*"]]))
1437
+ },
1438
+ parser: {
1439
+ ...sanitizeHtml.defaults.parser || {},
1440
+ lowerCaseAttributeNames: false,
1441
+ lowerCaseTags: false
1442
+ }
1443
+ });
1444
+ node.value = sanitized;
1445
+ });
1446
+ };
1447
+ }
1448
+ function injectComponentImports() {
1449
+ let imp = `const {${componentNames().join(", ")}} = window.$GRAPHENE.components`;
1450
+ return {
1451
+ markup: ({ content, filename }) => {
1452
+ if (!filename.endsWith(".md")) return;
1453
+ if (content.includes("<script>")) {
1454
+ content = content.replace("<script>", `<script>
1455
+ ${imp}`);
1456
+ } else {
1457
+ content = `<script>
1458
+ ${imp}
1459
+ </script>
1460
+ ${content}`;
1461
+ }
1462
+ return { code: content };
1463
+ },
1464
+ style: () => {
1465
+ },
1466
+ script: () => {
1467
+ }
1468
+ };
1469
+ }
1470
+ function componentNames() {
1471
+ if (cachedComponentNames) return cachedComponentNames;
1472
+ let files = fs4.readdirSync(path5.join(import.meta.dirname, "../ui/components"));
1473
+ cachedComponentNames = files.map((f) => path5.basename(f, ".svelte")).filter((f) => !f.startsWith("_"));
1474
+ return cachedComponentNames || [];
1475
+ }
1476
+ var cachedComponentNames;
1477
+ var init_mdCompile = __esm({
1478
+ "mdCompile.ts"() {
1479
+ cachedComponentNames = null;
1480
+ }
1481
+ });
1482
+
1388
1483
  // serve2.ts
1389
1484
  var serve2_exports = {};
1390
1485
  __export(serve2_exports, {
@@ -1393,19 +1488,18 @@ __export(serve2_exports, {
1393
1488
  });
1394
1489
  import { createServer, optimizeDeps } from "vite";
1395
1490
  import { svelte, vitePreprocess } from "@sveltejs/vite-plugin-svelte";
1396
- import { visit } from "unist-util-visit";
1397
- import fs4 from "fs-extra";
1491
+ import fs5 from "fs-extra";
1398
1492
  import crypto from "crypto";
1399
1493
  import { mdsvex } from "mdsvex";
1400
- import path5 from "path";
1494
+ import path6 from "path";
1401
1495
  import { fileURLToPath as fileURLToPath2 } from "url";
1402
1496
  import { WebSocketServer } from "ws";
1403
1497
  import { spawn as spawn2 } from "child_process";
1404
1498
  async function serve2() {
1405
1499
  grapheneRoot = config.root;
1406
- uiRoot = path5.join(fileURLToPath2(import.meta.url), "../../ui");
1407
- await fs4.ensureDir(path5.resolve(grapheneRoot, "node_modules/.graphene"));
1408
- await fs4.writeFile(path5.resolve(grapheneRoot, `node_modules/.graphene/${process.env.NODE_ENV == "test" ? "test" : "serve"}.pid`), String(process.pid));
1500
+ uiRoot = path6.join(fileURLToPath2(import.meta.url), "../../ui");
1501
+ await fs5.ensureDir(path6.resolve(grapheneRoot, "node_modules/.graphene"));
1502
+ await fs5.writeFile(path6.resolve(grapheneRoot, `node_modules/.graphene/${process.env.NODE_ENV == "test" ? "test" : "serve"}.pid`), String(process.pid));
1409
1503
  let server = await createServer({
1410
1504
  root: config.root,
1411
1505
  plugins: [
@@ -1415,8 +1509,8 @@ async function serve2() {
1415
1509
  vitePreprocess(),
1416
1510
  mdsvex({
1417
1511
  extensions: [".md"],
1418
- remarkPlugins: [extractQueries],
1419
- layout: path5.resolve(uiRoot, "layout.svelte")
1512
+ remarkPlugins: [extractQueries, escapeAngles],
1513
+ rehypePlugins: [sanitizeMarkdown]
1420
1514
  }),
1421
1515
  injectComponentImports()
1422
1516
  ]
@@ -1432,7 +1526,7 @@ async function serve2() {
1432
1526
  },
1433
1527
  resolve: {
1434
1528
  alias: {
1435
- graphene: path5.resolve(uiRoot, "web.js")
1529
+ graphene: path6.resolve(uiRoot, "web.js")
1436
1530
  }
1437
1531
  }
1438
1532
  });
@@ -1497,7 +1591,7 @@ async function handlePage(server, res, filePath, mount) {
1497
1591
  res.setHeader("Content-Type", "text/html");
1498
1592
  let mdMount = mount ? `
1499
1593
  import Page from ${JSON.stringify(filePath)};
1500
- new Page({ target: document.getElementById('app'), props: {} })
1594
+ new Page({ target: document.getElementById('content'), props: {} })
1501
1595
  ` : "";
1502
1596
  let html = await server.transformIndexHtml(filePath, `<!doctype html>
1503
1597
  <html lang="en">
@@ -1511,7 +1605,9 @@ async function handlePage(server, res, filePath, mount) {
1511
1605
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@100..900&display=swap" rel="stylesheet">
1512
1606
  </head>
1513
1607
  <body>
1514
- <div id="app"></div>
1608
+ <main>
1609
+ <div id="content"></div>
1610
+ </main>
1515
1611
  <script type="module">
1516
1612
  // do this first so we can track errors caused by importing the md file
1517
1613
  import 'graphene'
@@ -1523,34 +1619,6 @@ async function handlePage(server, res, filePath, mount) {
1523
1619
  </html>`);
1524
1620
  return res.end(html);
1525
1621
  }
1526
- function extractQueries() {
1527
- function escapeHtml(str) {
1528
- return str.replace(/&/g, "&amp;").replace(/"/g, "&quot;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
1529
- }
1530
- return function transformer(tree) {
1531
- visit(tree, "code", (node, index, parent) => {
1532
- if (index === null) return;
1533
- parent.children[index] = { type: "html", value: `<GrapheneQuery name="${escapeHtml(node.meta)}" code="${escapeHtml(node.value.trim())}" />` };
1534
- });
1535
- };
1536
- }
1537
- function injectComponentImports() {
1538
- let files = fs4.readdirSync(path5.join(uiRoot, "components"));
1539
- let componentNames = files.map((f) => path5.basename(f, ".svelte")).filter((f) => !f.startsWith("_"));
1540
- let imp = `const {${componentNames.join(", ")}} = window.$GRAPHENE.components`;
1541
- return {
1542
- markup: ({ content, filename }) => {
1543
- if (!filename.endsWith(".md")) return;
1544
- content = content.replace("<script>", `<script>
1545
- ${imp}`);
1546
- return { code: content };
1547
- },
1548
- style: () => {
1549
- },
1550
- script: () => {
1551
- }
1552
- };
1553
- }
1554
1622
  function mockFilesForTests() {
1555
1623
  if (process.env.NODE_ENV !== "test") return null;
1556
1624
  return {
@@ -1570,6 +1638,7 @@ var init_serve2 = __esm({
1570
1638
  "serve2.ts"() {
1571
1639
  init_core();
1572
1640
  init_connections();
1641
+ init_mdCompile();
1573
1642
  updateWorkspacePlugin = {
1574
1643
  name: "updateWorkspace",
1575
1644
  configureServer: (s) => {
@@ -1611,8 +1680,8 @@ var init_serve2 = __esm({
1611
1680
  if (pathName == "/graphene/view") return await handleView(req, res);
1612
1681
  if (pathName == "/__ct") return await handlePage(s, res, "__ct", false);
1613
1682
  if (!pathName || pathName == "/") pathName = "index";
1614
- let mdPath = path5.join(grapheneRoot, pathName + ".md");
1615
- if (await fs4.exists(mdPath)) {
1683
+ let mdPath = path6.join(grapheneRoot, pathName + ".md");
1684
+ if (await fs5.exists(mdPath)) {
1616
1685
  await handlePage(s, res, mdPath, true);
1617
1686
  } else {
1618
1687
  next();
@@ -1694,8 +1763,8 @@ function printTable(rows) {
1694
1763
  // cli.ts
1695
1764
  init_core();
1696
1765
  init_config();
1697
- import fs5 from "fs-extra";
1698
- import path6 from "path";
1766
+ import fs6 from "fs-extra";
1767
+ import path7 from "path";
1699
1768
  import os from "os";
1700
1769
 
1701
1770
  // background.ts
@@ -1848,9 +1917,9 @@ program.command("view").description("Capture a screenshot of a rendered markdown
1848
1917
  }
1849
1918
  if (result.screenshot) {
1850
1919
  let filename = `graphene-screenshot-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}.png`;
1851
- let screenshotPath = path6.join(os.tmpdir(), filename);
1920
+ let screenshotPath = path7.join(os.tmpdir(), filename);
1852
1921
  let base64Data = result.screenshot.replace(/^data:image\/png;base64,/, "");
1853
- await fs5.writeFile(screenshotPath, base64Data, "base64");
1922
+ await fs6.writeFile(screenshotPath, base64Data, "base64");
1854
1923
  console.log("Screenshot saved to", screenshotPath);
1855
1924
  }
1856
1925
  });
@@ -1865,9 +1934,9 @@ async function readInput(arg) {
1865
1934
  process.stdin.resume();
1866
1935
  });
1867
1936
  }
1868
- let absolutePath = path6.resolve(arg);
1869
- if (fs5.existsSync(absolutePath)) {
1870
- return await fs5.promises.readFile(absolutePath, "utf-8");
1937
+ let absolutePath = path7.resolve(arg);
1938
+ if (fs6.existsSync(absolutePath)) {
1939
+ return await fs6.promises.readFile(absolutePath, "utf-8");
1871
1940
  }
1872
1941
  return arg;
1873
1942
  }
@@ -1,86 +1,410 @@
1
1
  # How to develop in Graphene
2
2
 
3
- Graphene is a framework for building semantic layers and data visualizations in code. Graphene projects are comprised of:
3
+ Graphene is a framework for data analysis, semantic modeling, and data visualization in code. Graphene projects are comprised of:
4
4
  - .gsql files that define semantics-enriched tables (aka semantic models)
5
- - .md files that define data apps (dashboards)
5
+ - .md files that define data apps (aka dashboards)
6
6
 
7
7
  Graphene also has a CLI that lets you check syntax, run queries, serve data apps, and more.
8
8
 
9
9
  ## Graphene SQL (GSQL)
10
10
 
11
- ### Tables
12
- Tables have to be declared first before they can be queried. A table in Graphene has the added concept of _semantics_. Semantics are stored expressions and join relationships associated with a table that `select` queries can leverage. This allows query logic to be centralized, reusable, and more easily governed.
11
+ GSQL is comprised of `table` statements that declare tables and `select` statements that query them.
13
12
 
14
- Here's an example:
13
+ ### `table` statements
15
14
 
16
- ```gsql
15
+ `table` statements manifest tables that already exist in your database. Here's an example of two tables, `orders` and `users`, in GSQL.
16
+
17
+ ```sql
17
18
  table orders (
19
+
20
+ /* Base columns */
21
+
18
22
  id BIGINT primary_key,
19
23
  user_id BIGINT,
20
24
  created_at DATETIME,
21
- amount FLOAT, -- paid by customer #units=usd
22
- cost FLOAT, -- cost of materials #units=usd
25
+ status STRING, -- One of 'Processing', 'Shipped', 'Complete', 'Cancelled', 'Returned'
26
+ amount FLOAT, -- Amount paid by customer
27
+ cost FLOAT, -- Cost of materials
28
+
29
+ /* Join relationships */
23
30
 
24
31
  join_one users on user_id = users.id,
25
32
 
26
- sum(amount) as revenue,
27
- sum(amount - cost) as profit,
33
+ /* Scalar expressions */
34
+
35
+ status in ('Processing', 'Shipped', 'Complete') as revenue_recognized,
36
+
37
+ /* Agg expressions */
38
+
39
+ sum(case when revenue_recognized then amount else 0 end) as revenue,
40
+ sum(case when revenue_recognized then cost else 0 end) as cogs,
41
+ revenue - cogs as profit,
28
42
  profit / revenue as profit_margin
29
- );
43
+ )
30
44
 
31
45
  table users (
32
46
  id BIGINT primary_key,
33
47
  name VARCHAR,
34
48
  email VARCHAR,
35
49
  age INTEGER,
50
+ country_code VARCHAR,
36
51
 
37
52
  join_many orders on id = orders.user_id
38
- );
53
+ )
39
54
  ```
40
55
 
41
- Syntax notes
42
- - `table foo (...)` defines a Graphene table based on the database table `foo`.
43
- - The allowed join types are `join_one` and `join_many`. All joins are left outer joins. There is no inner, right, or cross join.
56
+ We can break down a table statement into three parts: [base columns](#base-columns-required), [join relationships](#join-relationships), and [stored expressions](#stored-expressions) (aka dimensions and measures).
57
+
58
+ #### Base columns (required)
59
+
60
+ The base column set is simply a reflection of the underlying database table's schema. Similar to `create table` statements in regular SQL DDL, you list each column's name and data type. One column must be designated as the primary key.
61
+
62
+ #### Join relationships
63
+
64
+ Join relationships in a `table` statement declare joins that can be used when querying them. This makes query writing easier and more foolproof. See [Using join relationships in queries](#using-join-relationships-in-queries) below for how to use modeled joins in queries.
65
+
66
+ The other main difference about joins in GSQL vs. regular SQL is that you have to explain if there are many rows in the left table for each row in the right table, or vice versa. This additional bit of information allows Graphene to prevent incorrect aggregation as a result of row duplication (aka fan-out) through joins. See [Safe aggregation in fan-outs](#safe-aggregation-in-fan-outs) for more details.
67
+
68
+ This information is provided with the two supported join types, `join_one` and `join_many`:
44
69
  - `join_one` is used if there are many rows in the **left** table for each row in the **right** table.
45
70
  - `join_many` is used if there are many rows in the **right** table for each row in the **left** table.
46
- - Join names within a table must be unique. Polymorphic relationships (eg., where there are multiple relationships between the same two tables on different keys) are allowed but must be aliased eg. `join_one users as owner on user_id = owner.id` and `join_one users as viewer on user_id = viewer.id`.
47
- - Comments in tables can provide descriptions as well as metadata (denoted by `#` inside the comment).
48
71
 
49
- Best practices
50
- - For a given table, only model joins that are directly on that table. Graphene will automatically traverse multi-hop joins when it compiles the collective table space.
51
- - A join between two tables should be modeled in both the respective `table` statements. This may seem redundant but it offers more flexibility for queries to choose which table to set in the `from` (remember that direction matters since all joins are left joins).
72
+ In the example above with `orders` and `users`, the joins confirm that there are many orders per user, and only one user per order.
73
+
74
+ Note that all joins in GSQL are left outer joins. There is no inner, right, or cross join.
75
+
76
+ ##### Multiple join relationships between the same two tables
52
77
 
53
- ### Queries
54
- Graphene tables can be queried using `select` statements. Here are some example queries on the tables above:
78
+ Sometimes there are multiple valid ways to join two tables together. You can model this in Graphene by aliasing the various joins with `as`, just as you would in normal SQL. For example:
55
79
 
80
+ ```sql
81
+ table projects (
82
+ ...
83
+ owner_id BIGINT,
84
+ viewer_id BIGINT,
85
+
86
+ join_one users as project_owner on owner_id = project_owner.id,
87
+ join_one users as project_viewer on viewer_id = project_viewer.id
88
+ )
89
+
90
+ table users (
91
+ ...
92
+ id BIGINT,
93
+
94
+ join_many projects as projects_as_owner on id = projects_as_owner.owner_id,
95
+ join_many projects as projects_as_viewer on id = projects_as_viewer.viewer_id
96
+ )
56
97
  ```
57
- -- top 10 customers by profit
58
- from orders select
59
- users.name, -- notice how we can access the joined table without a join here
60
- profit -- this expands into the stored expression defined in the table
98
+
99
+ ##### Best practices for modeling join relationships
100
+
101
+ - For a given `table` statement, only model joins that are directly on that table. Multi-hop join paths do not need to be written explicitly in order for queries to traverse them.
102
+ - A join between two tables should be modeled in both the respective `table` statements. This may seem redundant but it offers more flexibility for queries to choose which table to set in the `from` (remember that direction matters in queries since all joins are left joins).
103
+
104
+ #### Stored expressions
105
+
106
+ **Stored expressions** are GSQL expressions (ie. any arbitrary combination of functions, operators, and column references) that you want to make reusable to queries. Stored expressions are great for canonizing metrics, segments, and other important business definitions.
107
+
108
+ A stored expression must be given a name via `as`. It can then be referenced by name in queries that use the parent table. See [Using stored expressions in queries](#using-stored-expressions-in-queries) below for how to use stored expressions in queries.
109
+
110
+ Like expressions in regular SQL, expressions in GSQL are either scalar or aggregative. In BI parlance, these would be called dimensions and measures, respectively.
111
+
112
+ Expressions can refer to other expressions, as shown below.
113
+
114
+ ```sql
115
+ table orders (
116
+ ...
117
+
118
+ /* Scalar expressions */
119
+
120
+ status in ('Processing', 'Shipped', 'Complete') as revenue_recognized,
121
+
122
+ /* Agg expressions */
123
+
124
+ sum(case when revenue_recognized then amount else 0 end) as revenue,
125
+ sum(case when revenue_recognized then cost else 0 end) as cogs,
126
+ revenue - cogs as profit, -- even though there are no agg functions here, this is still aggregative as it references other aggregative expressions
127
+ profit / revenue as profit_margin
128
+ )
129
+ ```
130
+
131
+
132
+ ### `select` statements
133
+
134
+ `select` is how you write queries in Graphene SQL. It behaves similarly to regular SQL except in the following ways:
135
+ - It can invoke join relationships and stored expressions from `table` statements.
136
+ - It prevents users from accidentally aggregating incorrectly through joins.
137
+
138
+ These differences are described in the sections below.
139
+
140
+ #### Using join relationships in queries
141
+
142
+ If a `table` has join relationships declared in it, a `select` query on that table can leverage that join without needing to write its own join statement. This is helpful for query writers who have not memorized all the correct join keys.
143
+
144
+ If you recall the model from before:
145
+
146
+ ```sql
147
+ table orders (
148
+ id BIGINT primary_key,
149
+ user_id BIGINT,
150
+ created_at DATETIME,
151
+ status STRING, -- One of 'Processing', 'Shipped', 'Complete', 'Cancelled', 'Returned'
152
+ amount FLOAT, -- Amount paid by customer
153
+ cost FLOAT, -- Cost of materials
154
+
155
+ join_one users on user_id = users.id,
156
+
157
+ status in ('Processing', 'Shipped', 'Complete') as revenue_recognized,
158
+
159
+ sum(case when revenue_recognized then amount else 0 end) as revenue,
160
+ sum(case when revenue_recognized then cost else 0 end) as cogs,
161
+ revenue - cogs as profit,
162
+ profit / revenue as profit_margin
163
+ )
164
+
165
+ table users (
166
+ id BIGINT primary_key,
167
+ name VARCHAR,
168
+ email VARCHAR,
169
+ age INTEGER,
170
+ country_code VARCHAR,
171
+
172
+ join_many orders on id = orders.user_id
173
+ )
174
+ ```
175
+
176
+ We can write a query that leverages the modeled join relationship between `orders` and `users`:
177
+
178
+
179
+ ```sql
180
+ -- Top 10 customers by order count
181
+ select
182
+ users.name, -- Use the dot operator to traverse the modeled join relationship
183
+ count(*)
184
+ from orders -- A join statement here is not needed
185
+ group by 1
61
186
  order by 2 desc
62
187
  limit 10
63
188
  ```
64
189
 
190
+ ##### Multi-hop joins
191
+
192
+ Sometimes you need to access columns or stored expressions in a table that is two or more joins away from the `from` table. To do this, simply use more dot operators to trace the desired join path. For example, say there is another table added to our project, `countries`:
193
+
194
+ ```sql
195
+ table orders (
196
+ ...
197
+
198
+ join_one users on user_id = users.id
199
+ )
200
+
201
+ table users (
202
+ ...
203
+
204
+ join_many orders on id = orders.user_id,
205
+ join_one country on country_code = countries.code
206
+ )
207
+
208
+ table countries (
209
+ code VARCHAR primary_key,
210
+ name VARCHAR,
211
+ currency VARCHAR,
212
+ free_shipping BOOLEAN,
213
+
214
+ join_many users on code = users.country_code
215
+ )
65
216
  ```
66
- -- average age of customers over time
217
+
218
+ We can write the following query to show the top ten countries by order count:
219
+
220
+ ```sql
221
+ -- Top 10 countries by order count
67
222
  select
68
- month(date),
69
- average(users.age), -- in normal SQL this would fan-out in the join; in Graphene it smartly de-duplicates the fan-out when computing aggregates
223
+ users.countries.name, -- Orders -> Users -> Countries
224
+ count(*)
70
225
  from orders
226
+ group by 1
227
+ order by 2 desc
228
+ limit 10
71
229
  ```
72
230
 
73
- Syntax notes
74
- - Columns and stored expressions from joined tables can be accessed with the dot operator, eg. `users.age` in the example above. Multiple join hops can be traversed with multiple dots, eg. `users.countries.country_code`.
75
- - `join_one` and `join_many` work here, too. This is useful if the join you need has not been modeled already.
76
- - The `from`, `select`, `group by`, and `where` clauses can be written in any order.
77
- - Expressions in `group by` are implicitly selected, so `from orders select avg(amount) group by user_id` is valid.
78
- - `group by all` is implied if aggregate and scalar expressions are both present in the `select`. It can be omitted and the query will still effectively execute the `group by all`.
231
+ #### Using stored expressions in queries
232
+
233
+ A stored expression can be invoked in a query by simply referencing it by name.
234
+
235
+ Again, using the model from before:
236
+
237
+ ```sql
238
+ table orders (
239
+ id BIGINT primary_key,
240
+ user_id BIGINT,
241
+ created_at DATETIME,
242
+ status STRING, -- One of 'Processing', 'Shipped', 'Complete', 'Cancelled', 'Returned'
243
+ amount FLOAT, -- Amount paid by customer
244
+ cost FLOAT, -- Cost of materials
245
+
246
+ join_one users on user_id = users.id,
247
+
248
+ status in ('Processing', 'Shipped', 'Complete') as revenue_recognized,
249
+
250
+ sum(case when revenue_recognized then amount else 0 end) as revenue,
251
+ sum(case when revenue_recognized then cost else 0 end) as cogs,
252
+ revenue - cogs as profit,
253
+ profit / revenue as profit_margin
254
+ )
255
+
256
+ table users (
257
+ id BIGINT primary_key,
258
+ name VARCHAR,
259
+ email VARCHAR,
260
+ age INTEGER,
261
+ country_code VARCHAR,
262
+
263
+ join_many orders on id = orders.user_id
264
+ )
265
+ ```
266
+
267
+ We can count the number of orders that were revenue-recognized vs. not:
268
+
269
+ ```sql
270
+ -- Number of revenue-recognized orders vs. not
271
+ select
272
+ revenue_recognized, -- Stored expression in orders
273
+ count(*)
274
+ from orders
275
+ group by 1
276
+ ```
277
+
278
+ This would be equivalent to:
279
+
280
+ ```sql
281
+ select
282
+ status in ('Processing', 'Shipped', 'Complete') as revenue_recognized,
283
+ count(*)
284
+ from orders
285
+ group by 1
286
+ ```
287
+
288
+ You can see that invoking a stored expression is like using a macro: the definition for the stored expression is effectively expanded in-line by Graphene when it runs the query.
289
+
290
+ This is an important concept to understand when invoking stored expressions that are **aggregative** (ie. contain agg functions). Here's an example.
291
+
292
+ ```sql
293
+ -- Profit by month
294
+ select
295
+ date_trunc(created_at, month) as month,
296
+ profit
297
+ from orders
298
+ group by 1
299
+ order by 1 asc
300
+ ```
301
+
302
+ Note that, while `profit` looks like a column here, it is _not_ a column. That's because this query is equivalent to:
303
+
304
+ ```sql
305
+ select
306
+ date_trunc(created_at, month) as month,
307
+ sum(case when revenue_recognized then amount else 0 end) - sum(case when revenue_recognized then cost else 0 end) as profit -- Profit is defined as revenue - cogs, which respectively expands out to these two filtered sums
308
+ from orders
309
+ group by 1
310
+ order by 1 asc
311
+ ```
312
+
313
+ For this reason, in a query you would never wrap an aggregative stored expression in a `sum()` or `avg()` or any other agg function for the same reason you would never write `sum(sum(foo))` in SQL. That would throw an error!
314
+
315
+ #### Safe aggregation in fan-outs
316
+
317
+ A common and dangerous user error in regular SQL is aggregating data incorrectly after joining tables. This can happen when rows of one table match multiple rows of another, and effectively get duplicated for each match.
318
+
319
+ For example, after joining `users` to `orders`, your joined result will have some users repeated multiple times if they've made multiple purchases. If you wanted to find the average age of customers over this joined result, simply using an `avg(users.age)` would be _incorrect_, because you would be weighting the average towards users with multiple purchases, rather than taking the true average.
320
+
321
+ GSQL aims to solve this problem. With the additional information provided via `join_one` and `join_many`, Graphene knows under which scenarios when row dupliation occurs, and will rewrite aggregative expressions in a way that ignores the duplicate rows.
322
+
323
+ The query `select avg(users.age) from orders` will be rewritten to the following SQL when Graphene queries the underlying database (this is for BigQuery, specifically):
324
+
325
+ ```sql
326
+ SELECT
327
+ (CAST((
328
+ (
329
+ SUM(DISTINCT
330
+ (CAST(ROUND(COALESCE(users_0.`age`,0)*(1*1.0), 9) AS NUMERIC) +
331
+ (cast(cast(concat('0x', substr(to_hex(md5(CAST(users_0.`id` AS STRING))), 1, 15)) as int64) as numeric) * 4294967296 + cast(cast(concat('0x', substr(to_hex(md5(CAST(users_0.`id` AS STRING))), 16, 8)) as int64) as numeric)) * 0.000000001
332
+ ))
333
+ -
334
+ SUM(DISTINCT (cast(cast(concat('0x', substr(to_hex(md5(CAST(users_0.`id` AS STRING))), 1, 15)) as int64) as numeric) * 4294967296 + cast(cast(concat('0x', substr(to_hex(md5(CAST(users_0.`id` AS STRING))), 16, 8)) as int64) as numeric)) * 0.000000001)
335
+ )/(1*1.0)) AS FLOAT64))/NULLIF(COUNT(DISTINCT CASE WHEN users_0.`age` IS NOT NULL THEN users_0.`id` END),0) as `col_0`
336
+ FROM `bigquery-public-data.thelook_ecommerce.orders` as base
337
+ LEFT JOIN `bigquery-public-data.thelook_ecommerce.users` AS users_0
338
+ ON users_0.`id`=base.`user_id`
339
+ ```
340
+
341
+ You don't have to understand this; the point is that GSQL is minimizing the chances that naive users aggregate data incorrectly.
342
+
343
+ ### `table as` statements
344
+
345
+ You can turn the output of any `select` statement into a table with `table foo as (select ...)`. Here's an example of an additional table `user_facts` added to the two tables from earlier:
346
+
347
+ ```sql
348
+ table orders (
349
+ id BIGINT primary_key,
350
+ user_id BIGINT,
351
+ created_at DATETIME,
352
+ status STRING, -- One of 'Processing', 'Shipped', 'Complete', 'Cancelled', 'Returned'
353
+ amount FLOAT, -- Amount paid by customer
354
+ cost FLOAT, -- Cost of materials
355
+
356
+ join_one users on user_id = users.id,
357
+
358
+ status in ('Processing', 'Shipped', 'Complete') as revenue_recognized,
359
+
360
+ sum(case when revenue_recognized then amount else 0 end) as revenue,
361
+ sum(case when revenue_recognized then cost else 0 end) as cogs,
362
+ revenue - cogs as profit,
363
+ profit / revenue as profit_margin
364
+ )
365
+
366
+ table users (
367
+ id BIGINT primary_key,
368
+ name VARCHAR,
369
+ email VARCHAR,
370
+ age INTEGER,
371
+
372
+ join_many orders on id = orders.user_id,
373
+ join_one user_facts on id = user_facts.id,
374
+
375
+ /* Scalar expressions */
376
+
377
+ user_facts.ltv as ltv,
378
+ user_facts.lifetime_orders as lifetime_orders
379
+ )
380
+
381
+ table user_facts as (
382
+ select
383
+ id,
384
+ orders.revenue as ltv,
385
+ count(orders.id) as lifetime_orders,
386
+ from users
387
+ group by id
388
+ )
389
+ ```
390
+
391
+ `table as` statements are conceptually the same as view tables in regular SQL. A few things to note:
392
+ - You cannot yet declare join relationships or stored expressions directly in a `table as` statement. Other tables can declare join relationships to it, though, as shown above.
393
+ - In the example above, the `ltv` and `lifetime_orders` columns from `user_facts` are "hoisted" back into `users` so that they appear as if they are columns from `users`. This is simply a design choice which allows query writers to never need to know about `user_facts`.
394
+
395
+ ### Other miscellaneous details about GSQL
396
+
397
+ - Trailing commas in `table` statements are optional.
398
+ - Trailing semicolons after `table` and `table as` statements are optional.
399
+ - The clauses in a `select` statement (`select`, `from`, `join`, `group by`, etc.) can be written in any order. They cannot be repeated, however.
400
+ - `group by all` is implied if aggregative and scalar expressions are both present in the `select` clause. This means that `group by` can be omitted and the query will still effectively execute the `group by all`.
401
+ - Expressions in `group by` are implicitly selected, so `from orders select avg(amount) group by user_id` will return two columns.
79
402
  - `count` is a reserved word. Do not alias your columns as `count`.
403
+ - Window functions and set operations are not supported.
80
404
 
405
+ ## Graphene visualizations
81
406
 
82
- ## Graphene viz (.md)
83
- Graphene data apps are written in Markdown with components. Markdown files can contain named GSQL queries in code fences that components can then refer to. Those queries can use any tables defined in .gsql files.
407
+ Graphene data apps are written in Markdown with the addition of special Graphene HTML components. Markdown files can contain named GSQL queries in code fences that components can then refer to. Those queries can use any tables defined in .gsql files.
84
408
 
85
409
  ````markdown
86
410
  # Order analysis
@@ -96,9 +420,14 @@ Graphene data apps are written in Markdown with components. Markdown files can c
96
420
  </Row>
97
421
  ````
98
422
 
99
- Note that components can also directly refer to Graphene tables in their `data` property; it is not always necessary to prepare data in a code-fenced query. Properties that take column references can also take whole expressions, as shown in the second line chart from the example above.
423
+ Note that components can also directly refer to Graphene tables in their `data` property; it is not always necessary to prepare data in a code-fenced query. Properties that take column references can also take whole GSQL expressions, as shown in the second line chart from the example above.
424
+
425
+ Best practices
426
+ - If you have multiple time series charts, align their x-axes to have the same range and granularity.
427
+ - Use the same color for a given metric if it is used in multiple charts.
100
428
 
101
429
  ### Components
430
+
102
431
  The following components are available:
103
432
  - [BarChart](./data_apps/components/charts/bar-chart.md)
104
433
  - [LineChart](./data_apps/components/charts/line-chart.md)
@@ -111,6 +440,7 @@ The following components are available:
111
440
  - [TextInput](./data_apps/components/inputs/text-input.md)
112
441
 
113
442
  ## Using the Graphene CLI
443
+
114
444
  These are the available commands:
115
445
  - `npm run graphene check` - Checks the syntax for the entire Graphene project.
116
446
  - `npm run graphene compile "<GSQL>"` - Shows how GSQL is translated into the underlying database SQL.
@@ -119,11 +449,14 @@ These are the available commands:
119
449
  - `npm run graphene view <mdPath>` - Captures a screenshot of a given .md file, along with any errors encountered.
120
450
 
121
451
  ## AGENT INSTRUCTIONS
452
+
122
453
  Follow these guidelines when working in a Graphene project.
123
- - Before writing any GSQL queries, run them in the CLI first to make sure that the results make sense.
124
- - Do not redefine joins or expressions in a GSQL query that already exist in a semantic model. For example, if profit has already been defined as the stored expression `sum(revenue - cost) as profit` on the table `orders`, you can simply use it in a downstream query as `select profit from orders`.
125
- - Because all joins in Graphene are left outer joins, be mindful about your `from` table selection.
126
- - When adding a component to a .md file, read the associated documentation page first in /docs/data_apps/components so you understand all the available configurations.
454
+ - When formulating GSQL queries:
455
+ - First check all available stored expressions to see if there are any you can use. DO NOT redefine important business definitions like `profit` if they've already been modeled!
456
+ - Run your GSQL queries in the CLI first, _before_ you write them to a file. This way you can reason about the results to make sure they make sense.
127
457
  - Do not try to search the web for Graphene-specific info; you will not find anything. All the documentation is in /docs.
128
- - If you write to a .gsql file, run a syntax check with `npm run graphene check`.
129
- - If you write to a .md file, run a syntax check with `npm run graphene check`. Once there are no syntax errors, do a visual check by running `npm run graphene view <mdPath>` and looking at the .png it generates.
458
+ - When writing to a .gsql file, check your code with `npm run graphene check`.
459
+ - When writing to a Graphene .md file:
460
+ - First read ALL the linked component docs listed in [Components](#components) above.
461
+ - Check your code with `npm run graphene check`.
462
+ - Once there are no syntax errors, do a visual check by running `npm run graphene view <mdPath>` and looking at the .png it generates.
@@ -6,11 +6,9 @@ let staticErrors: Error[] = []
6
6
  let errorProviders: Record<string, ErrorProvider> = {}
7
7
 
8
8
  window.addEventListener('error', (event) => {
9
- console.log('recordedError')
10
9
  staticErrors.push(event.error)
11
10
  })
12
11
  window.addEventListener('unhandledrejection', (event) => {
13
- console.log('record unhandled')
14
12
  staticErrors.push(event.reason)
15
13
  })
16
14
 
package/package.json CHANGED
@@ -3,7 +3,7 @@
3
3
  "main": "cli.ts",
4
4
  "type": "module",
5
5
  "author": "Graphene Systems Inc",
6
- "version": "0.0.2",
6
+ "version": "0.0.4",
7
7
  "license": "Elastic-2.0",
8
8
  "engines": {
9
9
  "node": ">=16"
@@ -25,9 +25,9 @@
25
25
  "dependencies": {
26
26
  "@duckdb/node-api": "1.3.2-alpha.26",
27
27
  "@google-cloud/bigquery": "^8.1.1",
28
+ "@graphenedata/malloy": "0.0.304",
28
29
  "@lezer/common": "^1.2.3",
29
30
  "@lezer/lr": "^1.4.2",
30
- "@graphenedata/malloy": "0.0.304",
31
31
  "@sveltejs/vite-plugin-svelte": "3.1.2",
32
32
  "@tidyjs/tidy": "^2.5.2",
33
33
  "chalk": "^5.3.0",
@@ -36,7 +36,6 @@
36
36
  "cli-table3": "^0.6.3",
37
37
  "commander": "^11.0.0",
38
38
  "debounce": "^1.2.1",
39
- "dompurify": "^3.2.7",
40
39
  "echarts": "^5.5.0",
41
40
  "fs-extra": "11.2.0",
42
41
  "glob": "^11.0.3",
@@ -44,10 +43,7 @@
44
43
  "marked": "^16.3.0",
45
44
  "mdsvex": "^0.12.6",
46
45
  "nanoid": "3.3.8",
47
- "rehype-stringify": "^10.0.1",
48
- "remark": "^15.0.1",
49
- "remark-mdx": "^3.1.1",
50
- "remark-rehype": "^11.1.2",
46
+ "sanitize-html": "^2.17.0",
51
47
  "ssf": "^0.11.2",
52
48
  "svelte": "4.2.19",
53
49
  "unist-util-visit": "4.1.2",
@@ -57,6 +53,7 @@
57
53
  "devDependencies": {
58
54
  "@types/fs-extra": "^11.0.4",
59
55
  "@types/node": "^20.0.0",
56
+ "@types/sanitize-html": "^2.16.0",
60
57
  "@types/ws": "^8.18.1",
61
58
  "esbuild": "^0.21.5",
62
59
  "vitest": "3.0.5",
@@ -1,3 +0,0 @@
1
- <main>
2
- <slot />
3
- </main>