@graphenedata/cli 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/cli.js CHANGED
@@ -555,9 +555,9 @@ function analyzeQuery(queryNode) {
555
555
  isAgg ||= !!isSelectDistinct;
556
556
  selects.forEach((s) => {
557
557
  if (s.getChild("Wildcard")) {
558
- let path7 = s.getChild("Wildcard").getChildren("Identifier");
559
- let pathStrings = path7.map((p) => txt(p));
560
- let target = followJoins(path7, scope.table);
558
+ let path9 = s.getChild("Wildcard").getChildren("Identifier");
559
+ let pathStrings = path9.map((p) => txt(p));
560
+ let target = followJoins(path9, scope.table);
561
561
  if (!target) return;
562
562
  target.fields.forEach((f) => {
563
563
  if (isJoin(f) || f.isAgg) return;
@@ -655,8 +655,8 @@ function analyzeExpression(expr, scope) {
655
655
  if (scope.outputFields.includes(field) && field.isAgg) {
656
656
  return { node: "outputField", name: field.name, ...typeInfo, isAgg: field.isAgg };
657
657
  }
658
- let path7 = expr.getChildren("Identifier").map((i) => txt(i));
659
- return { node: "field", path: path7, ...typeInfo, isAgg: field.isAgg };
658
+ let path9 = expr.getChildren("Identifier").map((i) => txt(i));
659
+ return { node: "field", path: path9, ...typeInfo, isAgg: field.isAgg };
660
660
  }
661
661
  case "ExtractExpression": {
662
662
  let e = analyzeExpression(expr.getChild("Expression"), scope);
@@ -1215,11 +1215,11 @@ async function loadWorkspace(dir, includeMd) {
1215
1215
  updateFile(contents, file);
1216
1216
  }
1217
1217
  }
1218
- function updateFile(contents, path7) {
1219
- FILE_MAP[path7] ||= { path: path7, contents, tree: null, tables: [], queries: [] };
1220
- FILE_MAP[path7].contents = contents;
1221
- FILE_MAP[path7].tree = null;
1222
- return FILE_MAP[path7];
1218
+ function updateFile(contents, path9) {
1219
+ FILE_MAP[path9] ||= { path: path9, contents, tree: null, tables: [], queries: [] };
1220
+ FILE_MAP[path9].contents = contents;
1221
+ FILE_MAP[path9].tree = null;
1222
+ return FILE_MAP[path9];
1223
1223
  }
1224
1224
  function analyze(contents, type) {
1225
1225
  clearDiagnostics();
@@ -1286,6 +1286,8 @@ var bigQuery_exports = {};
1286
1286
  __export(bigQuery_exports, {
1287
1287
  BigQueryConnection: () => BigQueryConnection
1288
1288
  });
1289
+ import fs3 from "fs";
1290
+ import path4 from "path";
1289
1291
  import { BigQuery, BigQueryDate, BigQueryTimestamp } from "@google-cloud/bigquery";
1290
1292
  var BigQueryConnection;
1291
1293
  var init_bigQuery = __esm({
@@ -1294,6 +1296,13 @@ var init_bigQuery = __esm({
1294
1296
  BigQueryConnection = class {
1295
1297
  client;
1296
1298
  constructor(options = {}) {
1299
+ if (process.env.GOOGLE_CREDENTIALS_CONTENT) {
1300
+ let parsed = JSON.parse(process.env.GOOGLE_CREDENTIALS_CONTENT);
1301
+ let credPath = path4.resolve("./bq.json");
1302
+ fs3.writeFileSync("./bq.json", process.env.GOOGLE_CREDENTIALS_CONTENT.replace(" ", "\n "));
1303
+ process.env.GOOGLE_APPLICATION_CREDENTIALS = credPath;
1304
+ options.projectId = parsed.project_id;
1305
+ }
1297
1306
  options.projectId ||= config.googleProjectId;
1298
1307
  options.maxRetries ||= 3;
1299
1308
  options.userAgent ||= "Graphene";
@@ -1322,8 +1331,8 @@ var duckdb_exports = {};
1322
1331
  __export(duckdb_exports, {
1323
1332
  DuckDBConnection: () => DuckDBConnection
1324
1333
  });
1325
- import { promises as fs3 } from "fs";
1326
- import path4 from "path";
1334
+ import { promises as fs4 } from "fs";
1335
+ import path5 from "path";
1327
1336
  import { DuckDBTimestampValue, DuckDBInstance, DuckDBDateValue } from "@duckdb/node-api";
1328
1337
  var DuckDBConnection;
1329
1338
  var init_duckdb = __esm({
@@ -1336,10 +1345,10 @@ var init_duckdb = __esm({
1336
1345
  this.ready = this.initialize();
1337
1346
  }
1338
1347
  async initialize() {
1339
- let files = await fs3.readdir(config.root);
1348
+ let files = await fs4.readdir(config.root);
1340
1349
  let databasePath = files.find((f) => f.endsWith(".duckdb"));
1341
1350
  if (!databasePath) throw new Error("No .duckdb file found in current directory");
1342
- databasePath = path4.resolve(config.root, databasePath);
1351
+ databasePath = path5.resolve(config.root, databasePath);
1343
1352
  let db = await DuckDBInstance.create(":memory:");
1344
1353
  this.connection = await db.connect();
1345
1354
  let escapedPath = databasePath.replace(/'/g, "''");
@@ -1385,6 +1394,96 @@ var init_connections = __esm({
1385
1394
  }
1386
1395
  });
1387
1396
 
1397
+ // mdCompile.ts
1398
+ import fs5 from "fs";
1399
+ import path6 from "path";
1400
+ import { visit } from "unist-util-visit";
1401
+ import sanitizeHtml from "sanitize-html";
1402
+ function extractQueries() {
1403
+ function escapeHtml(str) {
1404
+ return str.replace(/&/g, "&amp;").replace(/"/g, "&quot;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
1405
+ }
1406
+ return function transformer(tree) {
1407
+ visit(tree, "code", (node, index, parent) => {
1408
+ if (index === null) return;
1409
+ let name = typeof node.meta === "string" ? node.meta : "";
1410
+ let code = typeof node.value === "string" ? node.value.trim() : "";
1411
+ parent.children[index] = { type: "html", value: `<GrapheneQuery name="${escapeHtml(name)}" code="${escapeHtml(code)}" />` };
1412
+ });
1413
+ };
1414
+ }
1415
+ function escapeAngles() {
1416
+ return function transformer(tree) {
1417
+ visit(tree, "text", (node) => {
1418
+ if (!node.value || typeof node.value !== "string") return;
1419
+ if (!node.value.includes("<")) return;
1420
+ node.value = node.value.replace(/</g, "&lt;");
1421
+ });
1422
+ };
1423
+ }
1424
+ function sanitizeMarkdown() {
1425
+ return function transformer(tree) {
1426
+ visit(tree, "raw", (node) => {
1427
+ if (typeof node.value !== "string") return;
1428
+ let expanded = node.value.replace(/<(\w+)((?:\s[^<>]*?)?)\s*\/>/gi, (_, name, attrs = "") => {
1429
+ let spacing = attrs;
1430
+ return `<${name}${spacing}></${name}>`;
1431
+ });
1432
+ let sanitized = sanitizeHtml(expanded, {
1433
+ ...sanitizeHtml.defaults,
1434
+ allowedTags: [
1435
+ ...sanitizeHtml.defaults.allowedTags,
1436
+ ...componentNames()
1437
+ ],
1438
+ allowedAttributes: {
1439
+ ...sanitizeHtml.defaults.allowedAttributes,
1440
+ ...Object.fromEntries(componentNames().map((n) => [n, ["*"]]))
1441
+ },
1442
+ parser: {
1443
+ ...sanitizeHtml.defaults.parser || {},
1444
+ lowerCaseAttributeNames: false,
1445
+ lowerCaseTags: false
1446
+ }
1447
+ });
1448
+ node.value = sanitized;
1449
+ });
1450
+ };
1451
+ }
1452
+ function injectComponentImports() {
1453
+ let imp = `const {${componentNames().join(", ")}} = window.$GRAPHENE.components`;
1454
+ return {
1455
+ markup: ({ content, filename }) => {
1456
+ if (!filename.endsWith(".md")) return;
1457
+ if (content.includes("<script>")) {
1458
+ content = content.replace("<script>", `<script>
1459
+ ${imp}`);
1460
+ } else {
1461
+ content = `<script>
1462
+ ${imp}
1463
+ </script>
1464
+ ${content}`;
1465
+ }
1466
+ return { code: content };
1467
+ },
1468
+ style: () => {
1469
+ },
1470
+ script: () => {
1471
+ }
1472
+ };
1473
+ }
1474
+ function componentNames() {
1475
+ if (cachedComponentNames) return cachedComponentNames;
1476
+ let files = fs5.readdirSync(path6.join(import.meta.dirname, "../ui/components"));
1477
+ cachedComponentNames = files.map((f) => path6.basename(f, ".svelte")).filter((f) => !f.startsWith("_"));
1478
+ return cachedComponentNames || [];
1479
+ }
1480
+ var cachedComponentNames;
1481
+ var init_mdCompile = __esm({
1482
+ "mdCompile.ts"() {
1483
+ cachedComponentNames = null;
1484
+ }
1485
+ });
1486
+
1388
1487
  // serve2.ts
1389
1488
  var serve2_exports = {};
1390
1489
  __export(serve2_exports, {
@@ -1393,19 +1492,18 @@ __export(serve2_exports, {
1393
1492
  });
1394
1493
  import { createServer, optimizeDeps } from "vite";
1395
1494
  import { svelte, vitePreprocess } from "@sveltejs/vite-plugin-svelte";
1396
- import { visit } from "unist-util-visit";
1397
- import fs4 from "fs-extra";
1495
+ import fs6 from "fs-extra";
1398
1496
  import crypto from "crypto";
1399
1497
  import { mdsvex } from "mdsvex";
1400
- import path5 from "path";
1498
+ import path7 from "path";
1401
1499
  import { fileURLToPath as fileURLToPath2 } from "url";
1402
1500
  import { WebSocketServer } from "ws";
1403
1501
  import { spawn as spawn2 } from "child_process";
1404
1502
  async function serve2() {
1405
1503
  grapheneRoot = config.root;
1406
- uiRoot = path5.join(fileURLToPath2(import.meta.url), "../../ui");
1407
- await fs4.ensureDir(path5.resolve(grapheneRoot, "node_modules/.graphene"));
1408
- await fs4.writeFile(path5.resolve(grapheneRoot, `node_modules/.graphene/${process.env.NODE_ENV == "test" ? "test" : "serve"}.pid`), String(process.pid));
1504
+ uiRoot = path7.join(fileURLToPath2(import.meta.url), "../../ui");
1505
+ await fs6.ensureDir(path7.resolve(grapheneRoot, "node_modules/.graphene"));
1506
+ await fs6.writeFile(path7.resolve(grapheneRoot, `node_modules/.graphene/${process.env.NODE_ENV == "test" ? "test" : "serve"}.pid`), String(process.pid));
1409
1507
  let server = await createServer({
1410
1508
  root: config.root,
1411
1509
  plugins: [
@@ -1415,8 +1513,8 @@ async function serve2() {
1415
1513
  vitePreprocess(),
1416
1514
  mdsvex({
1417
1515
  extensions: [".md"],
1418
- remarkPlugins: [extractQueries],
1419
- layout: path5.resolve(uiRoot, "layout.svelte")
1516
+ remarkPlugins: [extractQueries, escapeAngles],
1517
+ rehypePlugins: [sanitizeMarkdown]
1420
1518
  }),
1421
1519
  injectComponentImports()
1422
1520
  ]
@@ -1432,7 +1530,7 @@ async function serve2() {
1432
1530
  },
1433
1531
  resolve: {
1434
1532
  alias: {
1435
- graphene: path5.resolve(uiRoot, "web.js")
1533
+ graphene: path7.resolve(uiRoot, "web.js")
1436
1534
  }
1437
1535
  }
1438
1536
  });
@@ -1497,7 +1595,7 @@ async function handlePage(server, res, filePath, mount) {
1497
1595
  res.setHeader("Content-Type", "text/html");
1498
1596
  let mdMount = mount ? `
1499
1597
  import Page from ${JSON.stringify(filePath)};
1500
- new Page({ target: document.getElementById('app'), props: {} })
1598
+ new Page({ target: document.getElementById('content'), props: {} })
1501
1599
  ` : "";
1502
1600
  let html = await server.transformIndexHtml(filePath, `<!doctype html>
1503
1601
  <html lang="en">
@@ -1511,7 +1609,9 @@ async function handlePage(server, res, filePath, mount) {
1511
1609
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@100..900&display=swap" rel="stylesheet">
1512
1610
  </head>
1513
1611
  <body>
1514
- <div id="app"></div>
1612
+ <main>
1613
+ <div id="content"></div>
1614
+ </main>
1515
1615
  <script type="module">
1516
1616
  // do this first so we can track errors caused by importing the md file
1517
1617
  import 'graphene'
@@ -1523,34 +1623,6 @@ async function handlePage(server, res, filePath, mount) {
1523
1623
  </html>`);
1524
1624
  return res.end(html);
1525
1625
  }
1526
- function extractQueries() {
1527
- function escapeHtml(str) {
1528
- return str.replace(/&/g, "&amp;").replace(/"/g, "&quot;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
1529
- }
1530
- return function transformer(tree) {
1531
- visit(tree, "code", (node, index, parent) => {
1532
- if (index === null) return;
1533
- parent.children[index] = { type: "html", value: `<GrapheneQuery name="${escapeHtml(node.meta)}" code="${escapeHtml(node.value.trim())}" />` };
1534
- });
1535
- };
1536
- }
1537
- function injectComponentImports() {
1538
- let files = fs4.readdirSync(path5.join(uiRoot, "components"));
1539
- let componentNames = files.map((f) => path5.basename(f, ".svelte")).filter((f) => !f.startsWith("_"));
1540
- let imp = `const {${componentNames.join(", ")}} = window.$GRAPHENE.components`;
1541
- return {
1542
- markup: ({ content, filename }) => {
1543
- if (!filename.endsWith(".md")) return;
1544
- content = content.replace("<script>", `<script>
1545
- ${imp}`);
1546
- return { code: content };
1547
- },
1548
- style: () => {
1549
- },
1550
- script: () => {
1551
- }
1552
- };
1553
- }
1554
1626
  function mockFilesForTests() {
1555
1627
  if (process.env.NODE_ENV !== "test") return null;
1556
1628
  return {
@@ -1570,6 +1642,7 @@ var init_serve2 = __esm({
1570
1642
  "serve2.ts"() {
1571
1643
  init_core();
1572
1644
  init_connections();
1645
+ init_mdCompile();
1573
1646
  updateWorkspacePlugin = {
1574
1647
  name: "updateWorkspace",
1575
1648
  configureServer: (s) => {
@@ -1611,8 +1684,8 @@ var init_serve2 = __esm({
1611
1684
  if (pathName == "/graphene/view") return await handleView(req, res);
1612
1685
  if (pathName == "/__ct") return await handlePage(s, res, "__ct", false);
1613
1686
  if (!pathName || pathName == "/") pathName = "index";
1614
- let mdPath = path5.join(grapheneRoot, pathName + ".md");
1615
- if (await fs4.exists(mdPath)) {
1687
+ let mdPath = path7.join(grapheneRoot, pathName + ".md");
1688
+ if (await fs6.exists(mdPath)) {
1616
1689
  await handlePage(s, res, mdPath, true);
1617
1690
  } else {
1618
1691
  next();
@@ -1694,8 +1767,8 @@ function printTable(rows) {
1694
1767
  // cli.ts
1695
1768
  init_core();
1696
1769
  init_config();
1697
- import fs5 from "fs-extra";
1698
- import path6 from "path";
1770
+ import fs7 from "fs-extra";
1771
+ import path8 from "path";
1699
1772
  import os from "os";
1700
1773
 
1701
1774
  // background.ts
@@ -1848,9 +1921,9 @@ program.command("view").description("Capture a screenshot of a rendered markdown
1848
1921
  }
1849
1922
  if (result.screenshot) {
1850
1923
  let filename = `graphene-screenshot-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}.png`;
1851
- let screenshotPath = path6.join(os.tmpdir(), filename);
1924
+ let screenshotPath = path8.join(os.tmpdir(), filename);
1852
1925
  let base64Data = result.screenshot.replace(/^data:image\/png;base64,/, "");
1853
- await fs5.writeFile(screenshotPath, base64Data, "base64");
1926
+ await fs7.writeFile(screenshotPath, base64Data, "base64");
1854
1927
  console.log("Screenshot saved to", screenshotPath);
1855
1928
  }
1856
1929
  });
@@ -1865,9 +1938,9 @@ async function readInput(arg) {
1865
1938
  process.stdin.resume();
1866
1939
  });
1867
1940
  }
1868
- let absolutePath = path6.resolve(arg);
1869
- if (fs5.existsSync(absolutePath)) {
1870
- return await fs5.promises.readFile(absolutePath, "utf-8");
1941
+ let absolutePath = path8.resolve(arg);
1942
+ if (fs7.existsSync(absolutePath)) {
1943
+ return await fs7.promises.readFile(absolutePath, "utf-8");
1871
1944
  }
1872
1945
  return arg;
1873
1946
  }
@@ -1,86 +1,410 @@
1
1
  # How to develop in Graphene
2
2
 
3
- Graphene is a framework for building semantic layers and data visualizations in code. Graphene projects are comprised of:
3
+ Graphene is a framework for data analysis, semantic modeling, and data visualization in code. Graphene projects are comprised of:
4
4
  - .gsql files that define semantics-enriched tables (aka semantic models)
5
- - .md files that define data apps (dashboards)
5
+ - .md files that define data apps (aka dashboards)
6
6
 
7
7
  Graphene also has a CLI that lets you check syntax, run queries, serve data apps, and more.
8
8
 
9
9
  ## Graphene SQL (GSQL)
10
10
 
11
- ### Tables
12
- Tables have to be declared first before they can be queried. A table in Graphene has the added concept of _semantics_. Semantics are stored expressions and join relationships associated with a table that `select` queries can leverage. This allows query logic to be centralized, reusable, and more easily governed.
11
+ GSQL is comprised of `table` statements that declare tables and `select` statements that query them.
13
12
 
14
- Here's an example:
13
+ ### `table` statements
15
14
 
16
- ```gsql
15
+ `table` statements manifest tables that already exist in your database. Here's an example of two tables, `orders` and `users`, in GSQL.
16
+
17
+ ```sql
17
18
  table orders (
19
+
20
+ /* Base columns */
21
+
18
22
  id BIGINT primary_key,
19
23
  user_id BIGINT,
20
24
  created_at DATETIME,
21
- amount FLOAT, -- paid by customer #units=usd
22
- cost FLOAT, -- cost of materials #units=usd
25
+ status STRING, -- One of 'Processing', 'Shipped', 'Complete', 'Cancelled', 'Returned'
26
+ amount FLOAT, -- Amount paid by customer
27
+ cost FLOAT, -- Cost of materials
28
+
29
+ /* Join relationships */
23
30
 
24
31
  join_one users on user_id = users.id,
25
32
 
26
- sum(amount) as revenue,
27
- sum(amount - cost) as profit,
33
+ /* Scalar expressions */
34
+
35
+ status in ('Processing', 'Shipped', 'Complete') as revenue_recognized,
36
+
37
+ /* Agg expressions */
38
+
39
+ sum(case when revenue_recognized then amount else 0 end) as revenue,
40
+ sum(case when revenue_recognized then cost else 0 end) as cogs,
41
+ revenue - cogs as profit,
28
42
  profit / revenue as profit_margin
29
- );
43
+ )
30
44
 
31
45
  table users (
32
46
  id BIGINT primary_key,
33
47
  name VARCHAR,
34
48
  email VARCHAR,
35
49
  age INTEGER,
50
+ country_code VARCHAR,
36
51
 
37
52
  join_many orders on id = orders.user_id
38
- );
53
+ )
39
54
  ```
40
55
 
41
- Syntax notes
42
- - `table foo (...)` defines a Graphene table based on the database table `foo`.
43
- - The allowed join types are `join_one` and `join_many`. All joins are left outer joins. There is no inner, right, or cross join.
56
+ We can break down a table statement into three parts: [base columns](#base-columns-required), [join relationships](#join-relationships), and [stored expressions](#stored-expressions) (aka dimensions and measures).
57
+
58
+ #### Base columns (required)
59
+
60
+ The base column set is simply a reflection of the underlying database table's schema. Similar to `create table` statements in regular SQL DDL, you list each column's name and data type. One column must be designated as the primary key.
61
+
62
+ #### Join relationships
63
+
64
+ Join relationships in a `table` statement declare joins that can be used when querying them. This makes query writing easier and more foolproof. See [Using join relationships in queries](#using-join-relationships-in-queries) below for how to use modeled joins in queries.
65
+
66
+ The other main difference about joins in GSQL vs. regular SQL is that you have to explain if there are many rows in the left table for each row in the right table, or vice versa. This additional bit of information allows Graphene to prevent incorrect aggregation as a result of row duplication (aka fan-out) through joins. See [Safe aggregation in fan-outs](#safe-aggregation-in-fan-outs) for more details.
67
+
68
+ This information is provided with the two supported join types, `join_one` and `join_many`:
44
69
  - `join_one` is used if there are many rows in the **left** table for each row in the **right** table.
45
70
  - `join_many` is used if there are many rows in the **right** table for each row in the **left** table.
46
- - Join names within a table must be unique. Polymorphic relationships (eg., where there are multiple relationships between the same two tables on different keys) are allowed but must be aliased eg. `join_one users as owner on user_id = owner.id` and `join_one users as viewer on user_id = viewer.id`.
47
- - Comments in tables can provide descriptions as well as metadata (denoted by `#` inside the comment).
48
71
 
49
- Best practices
50
- - For a given table, only model joins that are directly on that table. Graphene will automatically traverse multi-hop joins when it compiles the collective table space.
51
- - A join between two tables should be modeled in both the respective `table` statements. This may seem redundant but it offers more flexibility for queries to choose which table to set in the `from` (remember that direction matters since all joins are left joins).
72
+ In the example above with `orders` and `users`, the joins confirm that there are many orders per user, and only one user per order.
73
+
74
+ Note that all joins in GSQL are left outer joins. There is no inner, right, or cross join.
75
+
76
+ ##### Multiple join relationships between the same two tables
52
77
 
53
- ### Queries
54
- Graphene tables can be queried using `select` statements. Here are some example queries on the tables above:
78
+ Sometimes there are multiple valid ways to join two tables together. You can model this in Graphene by aliasing the various joins with `as`, just as you would in normal SQL. For example:
55
79
 
80
+ ```sql
81
+ table projects (
82
+ ...
83
+ owner_id BIGINT,
84
+ viewer_id BIGINT,
85
+
86
+ join_one users as project_owner on owner_id = project_owner.id,
87
+ join_one users as project_viewer on viewer_id = project_viewer.id
88
+ )
89
+
90
+ table users (
91
+ ...
92
+ id BIGINT,
93
+
94
+ join_many projects as projects_as_owner on id = projects_as_owner.owner_id,
95
+ join_many projects as projects_as_viewer on id = projects_as_viewer.viewer_id
96
+ )
56
97
  ```
57
- -- top 10 customers by profit
58
- from orders select
59
- users.name, -- notice how we can access the joined table without a join here
60
- profit -- this expands into the stored expression defined in the table
98
+
99
+ ##### Best practices for modeling join relationships
100
+
101
+ - For a given `table` statement, only model joins that are directly on that table. Multi-hop join paths do not need to be written explicitly in order for queries to traverse them.
102
+ - A join between two tables should be modeled in both the respective `table` statements. This may seem redundant but it offers more flexibility for queries to choose which table to set in the `from` (remember that direction matters in queries since all joins are left joins).
103
+
104
+ #### Stored expressions
105
+
106
+ **Stored expressions** are GSQL expressions (ie. any arbitrary combination of functions, operators, and column references) that you want to make reusable to queries. Stored expressions are great for canonizing metrics, segments, and other important business definitions.
107
+
108
+ A stored expression must be given a name via `as`. It can then be referenced by name in queries that use the parent table. See [Using stored expressions in queries](#using-stored-expressions-in-queries) below for how to use stored expressions in queries.
109
+
110
+ Like expressions in regular SQL, expressions in GSQL are either scalar or aggregative. In BI parlance, these would be called dimensions and measures, respectively.
111
+
112
+ Expressions can refer to other expressions, as shown below.
113
+
114
+ ```sql
115
+ table orders (
116
+ ...
117
+
118
+ /* Scalar expressions */
119
+
120
+ status in ('Processing', 'Shipped', 'Complete') as revenue_recognized,
121
+
122
+ /* Agg expressions */
123
+
124
+ sum(case when revenue_recognized then amount else 0 end) as revenue,
125
+ sum(case when revenue_recognized then cost else 0 end) as cogs,
126
+ revenue - cogs as profit, -- even though there are no agg functions here, this is still aggregative as it references other aggregative expressions
127
+ profit / revenue as profit_margin
128
+ )
129
+ ```
130
+
131
+
132
+ ### `select` statements
133
+
134
+ `select` is how you write queries in Graphene SQL. It behaves similarly to regular SQL except in the following ways:
135
+ - It can invoke join relationships and stored expressions from `table` statements.
136
+ - It prevents users from accidentally aggregating incorrectly through joins.
137
+
138
+ These differences are described in the sections below.
139
+
140
+ #### Using join relationships in queries
141
+
142
+ If a `table` has join relationships declared in it, a `select` query on that table can leverage that join without needing to write its own join statement. This is helpful for query writers who have not memorized all the correct join keys.
143
+
144
+ If you recall the model from before:
145
+
146
+ ```sql
147
+ table orders (
148
+ id BIGINT primary_key,
149
+ user_id BIGINT,
150
+ created_at DATETIME,
151
+ status STRING, -- One of 'Processing', 'Shipped', 'Complete', 'Cancelled', 'Returned'
152
+ amount FLOAT, -- Amount paid by customer
153
+ cost FLOAT, -- Cost of materials
154
+
155
+ join_one users on user_id = users.id,
156
+
157
+ status in ('Processing', 'Shipped', 'Complete') as revenue_recognized,
158
+
159
+ sum(case when revenue_recognized then amount else 0 end) as revenue,
160
+ sum(case when revenue_recognized then cost else 0 end) as cogs,
161
+ revenue - cogs as profit,
162
+ profit / revenue as profit_margin
163
+ )
164
+
165
+ table users (
166
+ id BIGINT primary_key,
167
+ name VARCHAR,
168
+ email VARCHAR,
169
+ age INTEGER,
170
+ country_code VARCHAR,
171
+
172
+ join_many orders on id = orders.user_id
173
+ )
174
+ ```
175
+
176
+ We can write a query that leverages the modeled join relationship between `orders` and `users`:
177
+
178
+
179
+ ```sql
180
+ -- Top 10 customers by order count
181
+ select
182
+ users.name, -- Use the dot operator to traverse the modeled join relationship
183
+ count(*)
184
+ from orders -- A join statement here is not needed
185
+ group by 1
61
186
  order by 2 desc
62
187
  limit 10
63
188
  ```
64
189
 
190
+ ##### Multi-hop joins
191
+
192
+ Sometimes you need to access columns or stored expressions in a table that is two or more joins away from the `from` table. To do this, simply use more dot operators to trace the desired join path. For example, say there is another table added to our project, `countries`:
193
+
194
+ ```sql
195
+ table orders (
196
+ ...
197
+
198
+ join_one users on user_id = users.id
199
+ )
200
+
201
+ table users (
202
+ ...
203
+
204
+ join_many orders on id = orders.user_id,
205
+ join_one country on country_code = countries.code
206
+ )
207
+
208
+ table countries (
209
+ code VARCHAR primary_key,
210
+ name VARCHAR,
211
+ currency VARCHAR,
212
+ free_shipping BOOLEAN,
213
+
214
+ join_many users on code = users.country_code
215
+ )
65
216
  ```
66
- -- average age of customers over time
217
+
218
+ We can write the following query to show the top ten countries by order count:
219
+
220
+ ```sql
221
+ -- Top 10 countries by order count
67
222
  select
68
- month(date),
69
- average(users.age), -- in normal SQL this would fan-out in the join; in Graphene it smartly de-duplicates the fan-out when computing aggregates
223
+ users.countries.name, -- Orders -> Users -> Countries
224
+ count(*)
70
225
  from orders
226
+ group by 1
227
+ order by 2 desc
228
+ limit 10
71
229
  ```
72
230
 
73
- Syntax notes
74
- - Columns and stored expressions from joined tables can be accessed with the dot operator, eg. `users.age` in the example above. Multiple join hops can be traversed with multiple dots, eg. `users.countries.country_code`.
75
- - `join_one` and `join_many` work here, too. This is useful if the join you need has not been modeled already.
76
- - The `from`, `select`, `group by`, and `where` clauses can be written in any order.
77
- - Expressions in `group by` are implicitly selected, so `from orders select avg(amount) group by user_id` is valid.
78
- - `group by all` is implied if aggregate and scalar expressions are both present in the `select`. It can be omitted and the query will still effectively execute the `group by all`.
231
+ #### Using stored expressions in queries
232
+
233
+ A stored expression can be invoked in a query by simply referencing it by name.
234
+
235
+ Again, using the model from before:
236
+
237
+ ```sql
238
+ table orders (
239
+ id BIGINT primary_key,
240
+ user_id BIGINT,
241
+ created_at DATETIME,
242
+ status STRING, -- One of 'Processing', 'Shipped', 'Complete', 'Cancelled', 'Returned'
243
+ amount FLOAT, -- Amount paid by customer
244
+ cost FLOAT, -- Cost of materials
245
+
246
+ join_one users on user_id = users.id,
247
+
248
+ status in ('Processing', 'Shipped', 'Complete') as revenue_recognized,
249
+
250
+ sum(case when revenue_recognized then amount else 0 end) as revenue,
251
+ sum(case when revenue_recognized then cost else 0 end) as cogs,
252
+ revenue - cogs as profit,
253
+ profit / revenue as profit_margin
254
+ )
255
+
256
+ table users (
257
+ id BIGINT primary_key,
258
+ name VARCHAR,
259
+ email VARCHAR,
260
+ age INTEGER,
261
+ country_code VARCHAR,
262
+
263
+ join_many orders on id = orders.user_id
264
+ )
265
+ ```
266
+
267
+ We can count the number of orders that were revenue-recognized vs. not:
268
+
269
+ ```sql
270
+ -- Number of revenue-recognized orders vs. not
271
+ select
272
+ revenue_recognized, -- Stored expression in orders
273
+ count(*)
274
+ from orders
275
+ group by 1
276
+ ```
277
+
278
+ This would be equivalent to:
279
+
280
+ ```sql
281
+ select
282
+ status in ('Processing', 'Shipped', 'Complete') as revenue_recognized,
283
+ count(*)
284
+ from orders
285
+ group by 1
286
+ ```
287
+
288
+ You can see that invoking a stored expression is like using a macro: the definition for the stored expression is effectively expanded in-line by Graphene when it runs the query.
289
+
290
+ This is an important concept to understand when invoking stored expressions that are **aggregative** (ie. contain agg functions). Here's an example.
291
+
292
+ ```sql
293
+ -- Profit by month
294
+ select
295
+ date_trunc(created_at, month) as month,
296
+ profit
297
+ from orders
298
+ group by 1
299
+ order by 1 asc
300
+ ```
301
+
302
+ Note that, while `profit` looks like a column here, it is _not_ a column. That's because this query is equivalent to:
303
+
304
+ ```sql
305
+ select
306
+ date_trunc(created_at, month) as month,
307
+ sum(case when revenue_recognized then amount else 0 end) - sum(case when revenue_recognized then cost else 0 end) as profit -- Profit is defined as revenue - cogs, which respectively expands out to these two filtered sums
308
+ from orders
309
+ group by 1
310
+ order by 1 asc
311
+ ```
312
+
313
+ For this reason, in a query you would never wrap an aggregative stored expression in a `sum()` or `avg()` or any other agg function for the same reason you would never write `sum(sum(foo))` in SQL. That would throw an error!
314
+
315
+ #### Safe aggregation in fan-outs
316
+
317
+ A common and dangerous user error in regular SQL is aggregating data incorrectly after joining tables. This can happen when rows of one table match multiple rows of another, and effectively get duplicated for each match.
318
+
319
+ For example, after joining `users` to `orders`, your joined result will have some users repeated multiple times if they've made multiple purchases. If you wanted to find the average age of customers over this joined result, simply using an `avg(users.age)` would be _incorrect_, because you would be weighting the average towards users with multiple purchases, rather than taking the true average.
320
+
321
+ GSQL aims to solve this problem. With the additional information provided via `join_one` and `join_many`, Graphene knows under which scenarios when row dupliation occurs, and will rewrite aggregative expressions in a way that ignores the duplicate rows.
322
+
323
+ The query `select avg(users.age) from orders` will be rewritten to the following SQL when Graphene queries the underlying database (this is for BigQuery, specifically):
324
+
325
+ ```sql
326
+ SELECT
327
+ (CAST((
328
+ (
329
+ SUM(DISTINCT
330
+ (CAST(ROUND(COALESCE(users_0.`age`,0)*(1*1.0), 9) AS NUMERIC) +
331
+ (cast(cast(concat('0x', substr(to_hex(md5(CAST(users_0.`id` AS STRING))), 1, 15)) as int64) as numeric) * 4294967296 + cast(cast(concat('0x', substr(to_hex(md5(CAST(users_0.`id` AS STRING))), 16, 8)) as int64) as numeric)) * 0.000000001
332
+ ))
333
+ -
334
+ SUM(DISTINCT (cast(cast(concat('0x', substr(to_hex(md5(CAST(users_0.`id` AS STRING))), 1, 15)) as int64) as numeric) * 4294967296 + cast(cast(concat('0x', substr(to_hex(md5(CAST(users_0.`id` AS STRING))), 16, 8)) as int64) as numeric)) * 0.000000001)
335
+ )/(1*1.0)) AS FLOAT64))/NULLIF(COUNT(DISTINCT CASE WHEN users_0.`age` IS NOT NULL THEN users_0.`id` END),0) as `col_0`
336
+ FROM `bigquery-public-data.thelook_ecommerce.orders` as base
337
+ LEFT JOIN `bigquery-public-data.thelook_ecommerce.users` AS users_0
338
+ ON users_0.`id`=base.`user_id`
339
+ ```
340
+
341
+ You don't have to understand this; the point is that GSQL is minimizing the chances that naive users aggregate data incorrectly.
342
+
343
+ ### `table as` statements
344
+
345
+ You can turn the output of any `select` statement into a table with `table foo as (select ...)`. Here's an example of an additional table `user_facts` added to the two tables from earlier:
346
+
347
+ ```sql
348
+ table orders (
349
+ id BIGINT primary_key,
350
+ user_id BIGINT,
351
+ created_at DATETIME,
352
+ status STRING, -- One of 'Processing', 'Shipped', 'Complete', 'Cancelled', 'Returned'
353
+ amount FLOAT, -- Amount paid by customer
354
+ cost FLOAT, -- Cost of materials
355
+
356
+ join_one users on user_id = users.id,
357
+
358
+ status in ('Processing', 'Shipped', 'Complete') as revenue_recognized,
359
+
360
+ sum(case when revenue_recognized then amount else 0 end) as revenue,
361
+ sum(case when revenue_recognized then cost else 0 end) as cogs,
362
+ revenue - cogs as profit,
363
+ profit / revenue as profit_margin
364
+ )
365
+
366
+ table users (
367
+ id BIGINT primary_key,
368
+ name VARCHAR,
369
+ email VARCHAR,
370
+ age INTEGER,
371
+
372
+ join_many orders on id = orders.user_id,
373
+ join_one user_facts on id = user_facts.id,
374
+
375
+ /* Scalar expressions */
376
+
377
+ user_facts.ltv as ltv,
378
+ user_facts.lifetime_orders as lifetime_orders
379
+ )
380
+
381
+ table user_facts as (
382
+ select
383
+ id,
384
+ orders.revenue as ltv,
385
+ count(orders.id) as lifetime_orders,
386
+ from users
387
+ group by id
388
+ )
389
+ ```
390
+
391
+ `table as` statements are conceptually the same as view tables in regular SQL. A few things to note:
392
+ - You cannot yet declare join relationships or stored expressions directly in a `table as` statement. Other tables can declare join relationships to it, though, as shown above.
393
+ - In the example above, the `ltv` and `lifetime_orders` columns from `user_facts` are "hoisted" back into `users` so that they appear as if they are columns from `users`. This is simply a design choice which allows query writers to never need to know about `user_facts`.
394
+
395
+ ### Other miscellaneous details about GSQL
396
+
397
+ - Trailing commas in `table` statements are optional.
398
+ - Trailing semicolons after `table` and `table as` statements are optional.
399
+ - The clauses in a `select` statement (`select`, `from`, `join`, `group by`, etc.) can be written in any order. They cannot be repeated, however.
400
+ - `group by all` is implied if aggregative and scalar expressions are both present in the `select` clause. This means that `group by` can be omitted and the query will still effectively execute the `group by all`.
401
+ - Expressions in `group by` are implicitly selected, so `from orders select avg(amount) group by user_id` will return two columns.
79
402
  - `count` is a reserved word. Do not alias your columns as `count`.
403
+ - Window functions and set operations are not supported.
80
404
 
405
+ ## Graphene visualizations
81
406
 
82
- ## Graphene viz (.md)
83
- Graphene data apps are written in Markdown with components. Markdown files can contain named GSQL queries in code fences that components can then refer to. Those queries can use any tables defined in .gsql files.
407
+ Graphene data apps are written in Markdown with the addition of special Graphene HTML components. Markdown files can contain named GSQL queries in code fences that components can then refer to. Those queries can use any tables defined in .gsql files.
84
408
 
85
409
  ````markdown
86
410
  # Order analysis
@@ -96,9 +420,14 @@ Graphene data apps are written in Markdown with components. Markdown files can c
96
420
  </Row>
97
421
  ````
98
422
 
99
- Note that components can also directly refer to Graphene tables in their `data` property; it is not always necessary to prepare data in a code-fenced query. Properties that take column references can also take whole expressions, as shown in the second line chart from the example above.
423
+ Note that components can also directly refer to Graphene tables in their `data` property; it is not always necessary to prepare data in a code-fenced query. Properties that take column references can also take whole GSQL expressions, as shown in the second line chart from the example above.
424
+
425
+ Best practices
426
+ - If you have multiple time series charts, align their x-axes to have the same range and granularity.
427
+ - Use the same color for a given metric if it is used in multiple charts.
100
428
 
101
429
  ### Components
430
+
102
431
  The following components are available:
103
432
  - [BarChart](./data_apps/components/charts/bar-chart.md)
104
433
  - [LineChart](./data_apps/components/charts/line-chart.md)
@@ -111,6 +440,7 @@ The following components are available:
111
440
  - [TextInput](./data_apps/components/inputs/text-input.md)
112
441
 
113
442
  ## Using the Graphene CLI
443
+
114
444
  These are the available commands:
115
445
  - `npm run graphene check` - Checks the syntax for the entire Graphene project.
116
446
  - `npm run graphene compile "<GSQL>"` - Shows how GSQL is translated into the underlying database SQL.
@@ -119,11 +449,14 @@ These are the available commands:
119
449
  - `npm run graphene view <mdPath>` - Captures a screenshot of a given .md file, along with any errors encountered.
120
450
 
121
451
  ## AGENT INSTRUCTIONS
452
+
122
453
  Follow these guidelines when working in a Graphene project.
123
- - Before writing any GSQL queries, run them in the CLI first to make sure that the results make sense.
124
- - Do not redefine joins or expressions in a GSQL query that already exist in a semantic model. For example, if profit has already been defined as the stored expression `sum(revenue - cost) as profit` on the table `orders`, you can simply use it in a downstream query as `select profit from orders`.
125
- - Because all joins in Graphene are left outer joins, be mindful about your `from` table selection.
126
- - When adding a component to a .md file, read the associated documentation page first in /docs/data_apps/components so you understand all the available configurations.
454
+ - When formulating GSQL queries:
455
+ - First check all available stored expressions to see if there are any you can use. DO NOT redefine important business definitions like `profit` if they've already been modeled!
456
+ - Run your GSQL queries in the CLI first, _before_ you write them to a file. This way you can reason about the results to make sure they make sense.
127
457
  - Do not try to search the web for Graphene-specific info; you will not find anything. All the documentation is in /docs.
128
- - If you write to a .gsql file, run a syntax check with `npm run graphene check`.
129
- - If you write to a .md file, run a syntax check with `npm run graphene check`. Once there are no syntax errors, do a visual check by running `npm run graphene view <mdPath>` and looking at the .png it generates.
458
+ - When writing to a .gsql file, check your code with `npm run graphene check`.
459
+ - When writing to a Graphene .md file:
460
+ - First read ALL the linked component docs listed in [Components](#components) above.
461
+ - Check your code with `npm run graphene check`.
462
+ - Once there are no syntax errors, do a visual check by running `npm run graphene view <mdPath>` and looking at the .png it generates.
@@ -6,11 +6,9 @@ let staticErrors: Error[] = []
6
6
  let errorProviders: Record<string, ErrorProvider> = {}
7
7
 
8
8
  window.addEventListener('error', (event) => {
9
- console.log('recordedError')
10
9
  staticErrors.push(event.error)
11
10
  })
12
11
  window.addEventListener('unhandledrejection', (event) => {
13
- console.log('record unhandled')
14
12
  staticErrors.push(event.reason)
15
13
  })
16
14
 
package/package.json CHANGED
@@ -3,7 +3,7 @@
3
3
  "main": "cli.ts",
4
4
  "type": "module",
5
5
  "author": "Graphene Systems Inc",
6
- "version": "0.0.2",
6
+ "version": "0.0.3",
7
7
  "license": "Elastic-2.0",
8
8
  "engines": {
9
9
  "node": ">=16"
@@ -25,9 +25,9 @@
25
25
  "dependencies": {
26
26
  "@duckdb/node-api": "1.3.2-alpha.26",
27
27
  "@google-cloud/bigquery": "^8.1.1",
28
+ "@graphenedata/malloy": "0.0.304",
28
29
  "@lezer/common": "^1.2.3",
29
30
  "@lezer/lr": "^1.4.2",
30
- "@graphenedata/malloy": "0.0.304",
31
31
  "@sveltejs/vite-plugin-svelte": "3.1.2",
32
32
  "@tidyjs/tidy": "^2.5.2",
33
33
  "chalk": "^5.3.0",
@@ -36,7 +36,6 @@
36
36
  "cli-table3": "^0.6.3",
37
37
  "commander": "^11.0.0",
38
38
  "debounce": "^1.2.1",
39
- "dompurify": "^3.2.7",
40
39
  "echarts": "^5.5.0",
41
40
  "fs-extra": "11.2.0",
42
41
  "glob": "^11.0.3",
@@ -44,10 +43,7 @@
44
43
  "marked": "^16.3.0",
45
44
  "mdsvex": "^0.12.6",
46
45
  "nanoid": "3.3.8",
47
- "rehype-stringify": "^10.0.1",
48
- "remark": "^15.0.1",
49
- "remark-mdx": "^3.1.1",
50
- "remark-rehype": "^11.1.2",
46
+ "sanitize-html": "^2.17.0",
51
47
  "ssf": "^0.11.2",
52
48
  "svelte": "4.2.19",
53
49
  "unist-util-visit": "4.1.2",
@@ -57,6 +53,7 @@
57
53
  "devDependencies": {
58
54
  "@types/fs-extra": "^11.0.4",
59
55
  "@types/node": "^20.0.0",
56
+ "@types/sanitize-html": "^2.16.0",
60
57
  "@types/ws": "^8.18.1",
61
58
  "esbuild": "^0.21.5",
62
59
  "vitest": "3.0.5",
@@ -1,3 +0,0 @@
1
- <main>
2
- <slot />
3
- </main>