npm - @graphenedata/cli - Versions diffs - 0.0.2 → 0.0.4 - Mend

@graphenedata/cli 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/cli/cli.js +127 -58
package/dist/docs/graphene.md +377 -44
package/dist/ui/internal/telemetry.ts +0 -2
package/package.json +4 -7
package/dist/ui/layout.svelte +0 -3

package/dist/cli/cli.js CHANGED Viewed

@@ -555,9 +555,9 @@ function analyzeQuery(queryNode) {
   isAgg ||= !!isSelectDistinct;
   selects.forEach((s) => {
     if (s.getChild("Wildcard")) {
-      let path7 = s.getChild("Wildcard").getChildren("Identifier");
-      let pathStrings = path7.map((p) => txt(p));
-      let target = followJoins(path7, scope.table);
+      let path8 = s.getChild("Wildcard").getChildren("Identifier");
+      let pathStrings = path8.map((p) => txt(p));
+      let target = followJoins(path8, scope.table);
       if (!target) return;
       target.fields.forEach((f) => {
         if (isJoin(f) || f.isAgg) return;
@@ -655,8 +655,8 @@ function analyzeExpression(expr, scope) {
       if (scope.outputFields.includes(field) && field.isAgg) {
         return { node: "outputField", name: field.name, ...typeInfo, isAgg: field.isAgg };
       }
-      let path7 = expr.getChildren("Identifier").map((i) => txt(i));
-      return { node: "field", path: path7, ...typeInfo, isAgg: field.isAgg };
+      let path8 = expr.getChildren("Identifier").map((i) => txt(i));
+      return { node: "field", path: path8, ...typeInfo, isAgg: field.isAgg };
     }
     case "ExtractExpression": {
       let e = analyzeExpression(expr.getChild("Expression"), scope);
@@ -1215,11 +1215,11 @@ async function loadWorkspace(dir, includeMd) {
     updateFile(contents, file);
   }
 }
-function updateFile(contents, path7) {
-  FILE_MAP[path7] ||= { path: path7, contents, tree: null, tables: [], queries: [] };
-  FILE_MAP[path7].contents = contents;
-  FILE_MAP[path7].tree = null;
-  return FILE_MAP[path7];
+function updateFile(contents, path8) {
+  FILE_MAP[path8] ||= { path: path8, contents, tree: null, tables: [], queries: [] };
+  FILE_MAP[path8].contents = contents;
+  FILE_MAP[path8].tree = null;
+  return FILE_MAP[path8];
 }
 function analyze(contents, type) {
   clearDiagnostics();
@@ -1294,6 +1294,11 @@ var init_bigQuery = __esm({
     BigQueryConnection = class {
       client;
       constructor(options = {}) {
+        if (process.env.GOOGLE_CREDENTIALS_CONTENT) {
+          let parsed = JSON.parse(process.env.GOOGLE_CREDENTIALS_CONTENT);
+          options.projectId = parsed.project_id;
+          options.credentials = parsed;
+        }
         options.projectId ||= config.googleProjectId;
         options.maxRetries ||= 3;
         options.userAgent ||= "Graphene";
@@ -1385,6 +1390,96 @@ var init_connections = __esm({
   }
 });
+// mdCompile.ts
+import fs4 from "fs";
+import path5 from "path";
+import { visit } from "unist-util-visit";
+import sanitizeHtml from "sanitize-html";
+function extractQueries() {
+  function escapeHtml(str) {
+    return str.replace(/&/g, "&amp;").replace(/"/g, "&quot;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
+  }
+  return function transformer(tree) {
+    visit(tree, "code", (node, index, parent) => {
+      if (index === null) return;
+      let name = typeof node.meta === "string" ? node.meta : "";
+      let code = typeof node.value === "string" ? node.value.trim() : "";
+      parent.children[index] = { type: "html", value: `<GrapheneQuery name="${escapeHtml(name)}" code="${escapeHtml(code)}" />` };
+    });
+  };
+}
+function escapeAngles() {
+  return function transformer(tree) {
+    visit(tree, "text", (node) => {
+      if (!node.value || typeof node.value !== "string") return;
+      if (!node.value.includes("<")) return;
+      node.value = node.value.replace(/</g, "&lt;");
+    });
+  };
+}
+function sanitizeMarkdown() {
+  return function transformer(tree) {
+    visit(tree, "raw", (node) => {
+      if (typeof node.value !== "string") return;
+      let expanded = node.value.replace(/<(\w+)((?:\s[^<>]*?)?)\s*\/>/gi, (_, name, attrs = "") => {
+        let spacing = attrs;
+        return `<${name}${spacing}></${name}>`;
+      });
+      let sanitized = sanitizeHtml(expanded, {
+        ...sanitizeHtml.defaults,
+        allowedTags: [
+          ...sanitizeHtml.defaults.allowedTags,
+          ...componentNames()
+        ],
+        allowedAttributes: {
+          ...sanitizeHtml.defaults.allowedAttributes,
+          ...Object.fromEntries(componentNames().map((n) => [n, ["*"]]))
+        },
+        parser: {
+          ...sanitizeHtml.defaults.parser || {},
+          lowerCaseAttributeNames: false,
+          lowerCaseTags: false
+        }
+      });
+      node.value = sanitized;
+    });
+  };
+}
+function injectComponentImports() {
+  let imp = `const {${componentNames().join(", ")}} = window.$GRAPHENE.components`;
+  return {
+    markup: ({ content, filename }) => {
+      if (!filename.endsWith(".md")) return;
+      if (content.includes("<script>")) {
+        content = content.replace("<script>", `<script>
+${imp}`);
+      } else {
+        content = `<script>
+${imp}
+</script>
+${content}`;
+      }
+      return { code: content };
+    },
+    style: () => {
+    },
+    script: () => {
+    }
+  };
+}
+function componentNames() {
+  if (cachedComponentNames) return cachedComponentNames;
+  let files = fs4.readdirSync(path5.join(import.meta.dirname, "../ui/components"));
+  cachedComponentNames = files.map((f) => path5.basename(f, ".svelte")).filter((f) => !f.startsWith("_"));
+  return cachedComponentNames || [];
+}
+var cachedComponentNames;
+var init_mdCompile = __esm({
+  "mdCompile.ts"() {
+    cachedComponentNames = null;
+  }
+});
 // serve2.ts
 var serve2_exports = {};
 __export(serve2_exports, {
@@ -1393,19 +1488,18 @@ __export(serve2_exports, {
 });
 import { createServer, optimizeDeps } from "vite";
 import { svelte, vitePreprocess } from "@sveltejs/vite-plugin-svelte";
-import { visit } from "unist-util-visit";
-import fs4 from "fs-extra";
+import fs5 from "fs-extra";
 import crypto from "crypto";
 import { mdsvex } from "mdsvex";
-import path5 from "path";
+import path6 from "path";
 import { fileURLToPath as fileURLToPath2 } from "url";
 import { WebSocketServer } from "ws";
 import { spawn as spawn2 } from "child_process";
 async function serve2() {
   grapheneRoot = config.root;
-  uiRoot = path5.join(fileURLToPath2(import.meta.url), "../../ui");
-  await fs4.ensureDir(path5.resolve(grapheneRoot, "node_modules/.graphene"));
-  await fs4.writeFile(path5.resolve(grapheneRoot, `node_modules/.graphene/${process.env.NODE_ENV == "test" ? "test" : "serve"}.pid`), String(process.pid));
+  uiRoot = path6.join(fileURLToPath2(import.meta.url), "../../ui");
+  await fs5.ensureDir(path6.resolve(grapheneRoot, "node_modules/.graphene"));
+  await fs5.writeFile(path6.resolve(grapheneRoot, `node_modules/.graphene/${process.env.NODE_ENV == "test" ? "test" : "serve"}.pid`), String(process.pid));
   let server = await createServer({
     root: config.root,
     plugins: [
@@ -1415,8 +1509,8 @@ async function serve2() {
           vitePreprocess(),
           mdsvex({
             extensions: [".md"],
-            remarkPlugins: [extractQueries],
-            layout: path5.resolve(uiRoot, "layout.svelte")
+            remarkPlugins: [extractQueries, escapeAngles],
+            rehypePlugins: [sanitizeMarkdown]
           }),
           injectComponentImports()
         ]
@@ -1432,7 +1526,7 @@ async function serve2() {
     },
     resolve: {
       alias: {
-        graphene: path5.resolve(uiRoot, "web.js")
+        graphene: path6.resolve(uiRoot, "web.js")
       }
     }
   });
@@ -1497,7 +1591,7 @@ async function handlePage(server, res, filePath, mount) {
   res.setHeader("Content-Type", "text/html");
   let mdMount = mount ? `
     import Page from ${JSON.stringify(filePath)};
-    new Page({ target: document.getElementById('app'), props: {} })
+    new Page({ target: document.getElementById('content'), props: {} })
   ` : "";
   let html = await server.transformIndexHtml(filePath, `<!doctype html>
   <html lang="en">
@@ -1511,7 +1605,9 @@ async function handlePage(server, res, filePath, mount) {
       <link href="https://fonts.googleapis.com/css2?family=Inter:wght@100..900&display=swap" rel="stylesheet">
     </head>
     <body>
-      <div id="app"></div>
+      <main>
+        <div id="content"></div>
+      </main>
       <script type="module">
         // do this first so we can track errors caused by importing the md file
         import 'graphene'
@@ -1523,34 +1619,6 @@ async function handlePage(server, res, filePath, mount) {
   </html>`);
   return res.end(html);
 }
-function extractQueries() {
-  function escapeHtml(str) {
-    return str.replace(/&/g, "&amp;").replace(/"/g, "&quot;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
-  }
-  return function transformer(tree) {
-    visit(tree, "code", (node, index, parent) => {
-      if (index === null) return;
-      parent.children[index] = { type: "html", value: `<GrapheneQuery name="${escapeHtml(node.meta)}" code="${escapeHtml(node.value.trim())}" />` };
-    });
-  };
-}
-function injectComponentImports() {
-  let files = fs4.readdirSync(path5.join(uiRoot, "components"));
-  let componentNames = files.map((f) => path5.basename(f, ".svelte")).filter((f) => !f.startsWith("_"));
-  let imp = `const {${componentNames.join(", ")}} = window.$GRAPHENE.components`;
-  return {
-    markup: ({ content, filename }) => {
-      if (!filename.endsWith(".md")) return;
-      content = content.replace("<script>", `<script>
-${imp}`);
-      return { code: content };
-    },
-    style: () => {
-    },
-    script: () => {
-    }
-  };
-}
 function mockFilesForTests() {
   if (process.env.NODE_ENV !== "test") return null;
   return {
@@ -1570,6 +1638,7 @@ var init_serve2 = __esm({
   "serve2.ts"() {
     init_core();
     init_connections();
+    init_mdCompile();
     updateWorkspacePlugin = {
       name: "updateWorkspace",
       configureServer: (s) => {
@@ -1611,8 +1680,8 @@ var init_serve2 = __esm({
             if (pathName == "/graphene/view") return await handleView(req, res);
             if (pathName == "/__ct") return await handlePage(s, res, "__ct", false);
             if (!pathName || pathName == "/") pathName = "index";
-            let mdPath = path5.join(grapheneRoot, pathName + ".md");
-            if (await fs4.exists(mdPath)) {
+            let mdPath = path6.join(grapheneRoot, pathName + ".md");
+            if (await fs5.exists(mdPath)) {
               await handlePage(s, res, mdPath, true);
             } else {
               next();
@@ -1694,8 +1763,8 @@ function printTable(rows) {
 // cli.ts
 init_core();
 init_config();
-import fs5 from "fs-extra";
-import path6 from "path";
+import fs6 from "fs-extra";
+import path7 from "path";
 import os from "os";
 // background.ts
@@ -1848,9 +1917,9 @@ program.command("view").description("Capture a screenshot of a rendered markdown
   }
   if (result.screenshot) {
     let filename = `graphene-screenshot-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}.png`;
-    let screenshotPath = path6.join(os.tmpdir(), filename);
+    let screenshotPath = path7.join(os.tmpdir(), filename);
     let base64Data = result.screenshot.replace(/^data:image\/png;base64,/, "");
-    await fs5.writeFile(screenshotPath, base64Data, "base64");
+    await fs6.writeFile(screenshotPath, base64Data, "base64");
     console.log("Screenshot saved to", screenshotPath);
   }
 });
@@ -1865,9 +1934,9 @@ async function readInput(arg) {
       process.stdin.resume();
     });
   }
-  let absolutePath = path6.resolve(arg);
-  if (fs5.existsSync(absolutePath)) {
-    return await fs5.promises.readFile(absolutePath, "utf-8");
+  let absolutePath = path7.resolve(arg);
+  if (fs6.existsSync(absolutePath)) {
+    return await fs6.promises.readFile(absolutePath, "utf-8");
   }
   return arg;
 }

package/dist/docs/graphene.md CHANGED Viewed

@@ -1,86 +1,410 @@
 # How to develop in Graphene
-Graphene is a framework for building semantic layers and data visualizations in code. Graphene projects are comprised of:
+Graphene is a framework for data analysis, semantic modeling, and data visualization in code. Graphene projects are comprised of:
 - .gsql files that define semantics-enriched tables (aka semantic models)
-- .md files that define data apps (dashboards)
+- .md files that define data apps (aka dashboards)
 Graphene also has a CLI that lets you check syntax, run queries, serve data apps, and more.
 ## Graphene SQL (GSQL)
-### Tables
-Tables have to be declared first before they can be queried. A table in Graphene has the added concept of _semantics_. Semantics are stored expressions and join relationships associated with a table that `select` queries can leverage. This allows query logic to be centralized, reusable, and more easily governed.
+GSQL is comprised of `table` statements that declare tables and `select` statements that query them.
-Here's an example:
+### `table` statements
-```gsql
+`table` statements manifest tables that already exist in your database. Here's an example of two tables, `orders` and `users`, in GSQL.
+```sql
 table orders (
+  /* Base columns */
   id BIGINT primary_key,
   user_id BIGINT,
   created_at DATETIME,
-  amount FLOAT, -- paid by customer #units=usd
-  cost FLOAT, -- cost of materials #units=usd
+  status STRING, -- One of 'Processing', 'Shipped', 'Complete', 'Cancelled', 'Returned'
+  amount FLOAT, -- Amount paid by customer
+  cost FLOAT, -- Cost of materials
+  /* Join relationships */
   join_one users on user_id = users.id,
-  sum(amount) as revenue,
-  sum(amount - cost) as profit,
+  /* Scalar expressions */
+  status in ('Processing', 'Shipped', 'Complete') as revenue_recognized,
+  /* Agg expressions */
+  sum(case when revenue_recognized then amount else 0 end) as revenue,
+  sum(case when revenue_recognized then cost else 0 end) as cogs,
+  revenue - cogs as profit,
   profit / revenue as profit_margin
-);
+)
 table users (
   id BIGINT primary_key,
   name VARCHAR,
   email VARCHAR,
   age INTEGER,
+  country_code VARCHAR,
   join_many orders on id = orders.user_id
-);
+)
 ```
-Syntax notes
-- `table foo (...)` defines a Graphene table based on the database table `foo`.
-- The allowed join types are `join_one` and `join_many`. All joins are left outer joins. There is no inner, right, or cross join.
+We can break down a table statement into three parts: [base columns](#base-columns-required), [join relationships](#join-relationships), and [stored expressions](#stored-expressions) (aka dimensions and measures).
+#### Base columns (required)
+The base column set is simply a reflection of the underlying database table's schema. Similar to `create table` statements in regular SQL DDL, you list each column's name and data type. One column must be designated as the primary key.
+#### Join relationships
+Join relationships in a `table` statement declare joins that can be used when querying them. This makes query writing easier and more foolproof. See [Using join relationships in queries](#using-join-relationships-in-queries) below for how to use modeled joins in queries.
+The other main difference about joins in GSQL vs. regular SQL is that you have to explain if there are many rows in the left table for each row in the right table, or vice versa. This additional bit of information allows Graphene to prevent incorrect aggregation as a result of row duplication (aka fan-out) through joins. See [Safe aggregation in fan-outs](#safe-aggregation-in-fan-outs) for more details.
+This information is provided with the two supported join types, `join_one` and `join_many`:
 - `join_one` is used if there are many rows in the **left** table for each row in the **right** table.
 - `join_many` is used if there are many rows in the **right** table for each row in the **left** table.
-- Join names within a table must be unique. Polymorphic relationships (eg., where there are multiple relationships between the same two tables on different keys) are allowed but must be aliased eg. `join_one users as owner on user_id = owner.id` and `join_one users as viewer on user_id = viewer.id`.
-- Comments in tables can provide descriptions as well as metadata (denoted by `#` inside the comment).
-Best practices
-- For a given table, only model joins that are directly on that table. Graphene will automatically traverse multi-hop joins when it compiles the collective table space.
-- A join between two tables should be modeled in both the respective `table` statements. This may seem redundant but it offers more flexibility for queries to choose which table to set in the `from` (remember that direction matters since all joins are left joins).
+In the example above with `orders` and `users`, the joins confirm that there are many orders per user, and only one user per order.
+Note that all joins in GSQL are left outer joins. There is no inner, right, or cross join.
+##### Multiple join relationships between the same two tables
-### Queries
-Graphene tables can be queried using `select` statements. Here are some example queries on the tables above:
+Sometimes there are multiple valid ways to join two tables together. You can model this in Graphene by aliasing the various joins with `as`, just as you would in normal SQL. For example:
+```sql
+table projects (
+  ...
+  owner_id BIGINT,
+  viewer_id BIGINT,
+  join_one users as project_owner on owner_id = project_owner.id,
+  join_one users as project_viewer on viewer_id = project_viewer.id
+)
+table users (
+  ...
+  id BIGINT,
+  join_many projects as projects_as_owner on id = projects_as_owner.owner_id,
+  join_many projects as projects_as_viewer on id = projects_as_viewer.viewer_id
+)
 ```
--- top 10 customers by profit
-from orders select
-  users.name, -- notice how we can access the joined table without a join here
-  profit -- this expands into the stored expression defined in the table
+##### Best practices for modeling join relationships
+- For a given `table` statement, only model joins that are directly on that table. Multi-hop join paths do not need to be written explicitly in order for queries to traverse them.
+- A join between two tables should be modeled in both the respective `table` statements. This may seem redundant but it offers more flexibility for queries to choose which table to set in the `from` (remember that direction matters in queries since all joins are left joins).
+#### Stored expressions
+**Stored expressions** are GSQL expressions (ie. any arbitrary combination of functions, operators, and column references) that you want to make reusable to queries. Stored expressions are great for canonizing metrics, segments, and other important business definitions.
+A stored expression must be given a name via `as`. It can then be referenced by name in queries that use the parent table. See [Using stored expressions in queries](#using-stored-expressions-in-queries) below for how to use stored expressions in queries.
+Like expressions in regular SQL, expressions in GSQL are either scalar or aggregative. In BI parlance, these would be called dimensions and measures, respectively.
+Expressions can refer to other expressions, as shown below.
+```sql
+table orders (
+  ...
+  /* Scalar expressions */
+  status in ('Processing', 'Shipped', 'Complete') as revenue_recognized,
+  /* Agg expressions */
+  sum(case when revenue_recognized then amount else 0 end) as revenue,
+  sum(case when revenue_recognized then cost else 0 end) as cogs,
+  revenue - cogs as profit, -- even though there are no agg functions here, this is still aggregative as it references other aggregative expressions
+  profit / revenue as profit_margin
+)
+```
+### `select` statements
+`select` is how you write queries in Graphene SQL. It behaves similarly to regular SQL except in the following ways:
+- It can invoke join relationships and stored expressions from `table` statements.
+- It prevents users from accidentally aggregating incorrectly through joins.
+These differences are described in the sections below.
+#### Using join relationships in queries
+If a `table` has join relationships declared in it, a `select` query on that table can leverage that join without needing to write its own join statement. This is helpful for query writers who have not memorized all the correct join keys.
+If you recall the model from before:
+```sql
+table orders (
+  id BIGINT primary_key,
+  user_id BIGINT,
+  created_at DATETIME,
+  status STRING, -- One of 'Processing', 'Shipped', 'Complete', 'Cancelled', 'Returned'
+  amount FLOAT, -- Amount paid by customer
+  cost FLOAT, -- Cost of materials
+  join_one users on user_id = users.id,
+  status in ('Processing', 'Shipped', 'Complete') as revenue_recognized,
+  sum(case when revenue_recognized then amount else 0 end) as revenue,
+  sum(case when revenue_recognized then cost else 0 end) as cogs,
+  revenue - cogs as profit,
+  profit / revenue as profit_margin
+)
+table users (
+  id BIGINT primary_key,
+  name VARCHAR,
+  email VARCHAR,
+  age INTEGER,
+  country_code VARCHAR,
+  join_many orders on id = orders.user_id
+)
+```
+We can write a query that leverages the modeled join relationship between `orders` and `users`:
+```sql
+-- Top 10 customers by order count
+select
+  users.name, -- Use the dot operator to traverse the modeled join relationship
+  count(*)
+from orders -- A join statement here is not needed
+group by 1
 order by 2 desc
 limit 10
 ```
+##### Multi-hop joins
+Sometimes you need to access columns or stored expressions in a table that is two or more joins away from the `from` table. To do this, simply use more dot operators to trace the desired join path. For example, say there is another table added to our project, `countries`:
+```sql
+table orders (
+  ...
+  join_one users on user_id = users.id
+)
+table users (
+  ...
+  join_many orders on id = orders.user_id,
+  join_one country on country_code = countries.code
+)
+table countries (
+  code VARCHAR primary_key,
+  name VARCHAR,
+  currency VARCHAR,
+  free_shipping BOOLEAN,
+  join_many users on code = users.country_code
+)
 ```
--- average age of customers over time
+We can write the following query to show the top ten countries by order count:
+```sql
+-- Top 10 countries by order count
 select
-  month(date),
-  average(users.age), -- in normal SQL this would fan-out in the join; in Graphene it smartly de-duplicates the fan-out when computing aggregates
+  users.countries.name, -- Orders -> Users -> Countries
+  count(*)
 from orders
+group by 1
+order by 2 desc
+limit 10
 ```
-Syntax notes
-- Columns and stored expressions from joined tables can be accessed with the dot operator, eg. `users.age` in the example above. Multiple join hops can be traversed with multiple dots, eg. `users.countries.country_code`.
-- `join_one` and `join_many` work here, too. This is useful if the join you need has not been modeled already.
-- The `from`, `select`, `group by`, and `where` clauses can be written in any order.
-- Expressions in `group by` are implicitly selected, so `from orders select avg(amount) group by user_id` is valid.
-- `group by all` is implied if aggregate and scalar expressions are both present in the `select`. It can be omitted and the query will still effectively execute the `group by all`.
+#### Using stored expressions in queries
+A stored expression can be invoked in a query by simply referencing it by name.
+Again, using the model from before:
+```sql
+table orders (
+  id BIGINT primary_key,
+  user_id BIGINT,
+  created_at DATETIME,
+  status STRING, -- One of 'Processing', 'Shipped', 'Complete', 'Cancelled', 'Returned'
+  amount FLOAT, -- Amount paid by customer
+  cost FLOAT, -- Cost of materials
+  join_one users on user_id = users.id,
+  status in ('Processing', 'Shipped', 'Complete') as revenue_recognized,
+  sum(case when revenue_recognized then amount else 0 end) as revenue,
+  sum(case when revenue_recognized then cost else 0 end) as cogs,
+  revenue - cogs as profit,
+  profit / revenue as profit_margin
+)
+table users (
+  id BIGINT primary_key,
+  name VARCHAR,
+  email VARCHAR,
+  age INTEGER,
+  country_code VARCHAR,
+  join_many orders on id = orders.user_id
+)
+```
+We can count the number of orders that were revenue-recognized vs. not:
+```sql
+-- Number of revenue-recognized orders vs. not
+select
+  revenue_recognized, -- Stored expression in orders
+  count(*)
+from orders
+group by 1
+```
+This would be equivalent to:
+```sql
+select
+  status in ('Processing', 'Shipped', 'Complete') as revenue_recognized,
+  count(*)
+from orders
+group by 1
+```
+You can see that invoking a stored expression is like using a macro: the definition for the stored expression is effectively expanded in-line by Graphene when it runs the query.
+This is an important concept to understand when invoking stored expressions that are **aggregative** (ie. contain agg functions). Here's an example.
+```sql
+-- Profit by month
+select
+  date_trunc(created_at, month) as month,
+  profit
+from orders
+group by 1
+order by 1 asc
+```
+Note that, while `profit` looks like a column here, it is _not_ a column. That's because this query is equivalent to:
+```sql
+select
+  date_trunc(created_at, month) as month,
+  sum(case when revenue_recognized then amount else 0 end) - sum(case when revenue_recognized then cost else 0 end) as profit -- Profit is defined as revenue - cogs, which respectively expands out to these two filtered sums
+from orders
+group by 1
+order by 1 asc
+```
+For this reason, in a query you would never wrap an aggregative stored expression in a `sum()` or `avg()` or any other agg function for the same reason you would never write `sum(sum(foo))` in SQL. That would throw an error!
+#### Safe aggregation in fan-outs
+A common and dangerous user error in regular SQL is aggregating data incorrectly after joining tables. This can happen when rows of one table match multiple rows of another, and effectively get duplicated for each match.
+For example, after joining `users` to `orders`, your joined result will have some users repeated multiple times if they've made multiple purchases. If you wanted to find the average age of customers over this joined result, simply using an `avg(users.age)` would be _incorrect_, because you would be weighting the average towards users with multiple purchases, rather than taking the true average.
+GSQL aims to solve this problem. With the additional information provided via `join_one` and `join_many`, Graphene knows under which scenarios when row dupliation occurs, and will rewrite aggregative expressions in a way that ignores the duplicate rows.
+The query `select avg(users.age) from orders` will be rewritten to the following SQL when Graphene queries the underlying database (this is for BigQuery, specifically):
+```sql
+SELECT
+   (CAST((
+    (
+      SUM(DISTINCT
+        (CAST(ROUND(COALESCE(users_0.`age`,0)*(1*1.0), 9) AS NUMERIC) +
+        (cast(cast(concat('0x', substr(to_hex(md5(CAST(users_0.`id` AS STRING))), 1, 15)) as int64) as numeric) * 4294967296 + cast(cast(concat('0x', substr(to_hex(md5(CAST(users_0.`id` AS STRING))), 16, 8)) as int64) as numeric)) * 0.000000001
+      ))
+      -
+       SUM(DISTINCT (cast(cast(concat('0x', substr(to_hex(md5(CAST(users_0.`id` AS STRING))), 1, 15)) as int64) as numeric) * 4294967296 + cast(cast(concat('0x', substr(to_hex(md5(CAST(users_0.`id` AS STRING))), 16, 8)) as int64) as numeric)) * 0.000000001)
+    )/(1*1.0)) AS FLOAT64))/NULLIF(COUNT(DISTINCT CASE WHEN users_0.`age` IS NOT NULL THEN users_0.`id` END),0) as `col_0`
+FROM `bigquery-public-data.thelook_ecommerce.orders` as base
+ LEFT JOIN `bigquery-public-data.thelook_ecommerce.users` AS users_0
+  ON users_0.`id`=base.`user_id`
+```
+You don't have to understand this; the point is that GSQL is minimizing the chances that naive users aggregate data incorrectly.
+### `table as` statements
+You can turn the output of any `select` statement into a table with `table foo as (select ...)`. Here's an example of an additional table `user_facts` added to the two tables from earlier:
+```sql
+table orders (
+  id BIGINT primary_key,
+  user_id BIGINT,
+  created_at DATETIME,
+  status STRING, -- One of 'Processing', 'Shipped', 'Complete', 'Cancelled', 'Returned'
+  amount FLOAT, -- Amount paid by customer
+  cost FLOAT, -- Cost of materials
+  join_one users on user_id = users.id,
+  status in ('Processing', 'Shipped', 'Complete') as revenue_recognized,
+  sum(case when revenue_recognized then amount else 0 end) as revenue,
+  sum(case when revenue_recognized then cost else 0 end) as cogs,
+  revenue - cogs as profit,
+  profit / revenue as profit_margin
+)
+table users (
+  id BIGINT primary_key,
+  name VARCHAR,
+  email VARCHAR,
+  age INTEGER,
+  join_many orders on id = orders.user_id,
+  join_one user_facts on id = user_facts.id,
+  /* Scalar expressions */
+  user_facts.ltv as ltv,
+  user_facts.lifetime_orders as lifetime_orders
+)
+table user_facts as (
+  select
+    id,
+    orders.revenue as ltv,
+    count(orders.id) as lifetime_orders,
+  from users
+  group by id
+)
+```
+`table as` statements are conceptually the same as view tables in regular SQL. A few things to note:
+- You cannot yet declare join relationships or stored expressions directly in a `table as` statement. Other tables can declare join relationships to it, though, as shown above.
+- In the example above, the `ltv` and `lifetime_orders` columns from `user_facts` are "hoisted" back into `users` so that they appear as if they are columns from `users`. This is simply a design choice which allows query writers to never need to know about `user_facts`.
+### Other miscellaneous details about GSQL
+- Trailing commas in `table` statements are optional.
+- Trailing semicolons after `table` and `table as` statements are optional.
+- The clauses in a `select` statement (`select`, `from`, `join`, `group by`, etc.) can be written in any order. They cannot be repeated, however.
+- `group by all` is implied if aggregative and scalar expressions are both present in the `select` clause. This means that `group by` can be omitted and the query will still effectively execute the `group by all`.
+- Expressions in `group by` are implicitly selected, so `from orders select avg(amount) group by user_id` will return two columns.
 - `count` is a reserved word. Do not alias your columns as `count`.
+- Window functions and set operations are not supported.
+## Graphene visualizations
-## Graphene viz (.md)
-Graphene data apps are written in Markdown with components. Markdown files can contain named GSQL queries in code fences that components can then refer to. Those queries can use any tables defined in .gsql files.
+Graphene data apps are written in Markdown with the addition of special Graphene HTML components. Markdown files can contain named GSQL queries in code fences that components can then refer to. Those queries can use any tables defined in .gsql files.
 ````markdown
   # Order analysis
@@ -96,9 +420,14 @@ Graphene data apps are written in Markdown with components. Markdown files can c
   </Row>
 ````
-Note that components can also directly refer to Graphene tables in their `data` property; it is not always necessary to prepare data in a code-fenced query. Properties that take column references can also take whole expressions, as shown in the second line chart from the example above.
+Note that components can also directly refer to Graphene tables in their `data` property; it is not always necessary to prepare data in a code-fenced query. Properties that take column references can also take whole GSQL expressions, as shown in the second line chart from the example above.
+Best practices
+- If you have multiple time series charts, align their x-axes to have the same range and granularity.
+- Use the same color for a given metric if it is used in multiple charts.
 ### Components
 The following components are available:
 - [BarChart](./data_apps/components/charts/bar-chart.md)
 - [LineChart](./data_apps/components/charts/line-chart.md)
@@ -111,6 +440,7 @@ The following components are available:
 - [TextInput](./data_apps/components/inputs/text-input.md)
 ## Using the Graphene CLI
 These are the available commands:
 - `npm run graphene check` - Checks the syntax for the entire Graphene project.
 - `npm run graphene compile "<GSQL>"` - Shows how GSQL is translated into the underlying database SQL.
@@ -119,11 +449,14 @@ These are the available commands:
 - `npm run graphene view <mdPath>` - Captures a screenshot of a given .md file, along with any errors encountered.
 ## AGENT INSTRUCTIONS
 Follow these guidelines when working in a Graphene project.
-- Before writing any GSQL queries, run them in the CLI first to make sure that the results make sense.
-- Do not redefine joins or expressions in a GSQL query that already exist in a semantic model. For example, if profit has already been defined as the stored expression `sum(revenue - cost) as profit` on the table `orders`, you can simply use it in a downstream query as `select profit from orders`.
-- Because all joins in Graphene are left outer joins, be mindful about your `from` table selection.
-- When adding a component to a .md file, read the associated documentation page first in /docs/data_apps/components so you understand all the available configurations.
+- When formulating GSQL queries:
+   - First check all available stored expressions to see if there are any you can use. DO NOT redefine important business definitions like `profit` if they've already been modeled!
+   - Run your GSQL queries in the CLI first, _before_ you write them to a file. This way you can reason about the results to make sure they make sense.
 - Do not try to search the web for Graphene-specific info; you will not find anything. All the documentation is in /docs.
-- If you write to a .gsql file, run a syntax check with `npm run graphene check`.
-- If you write to a .md file, run a syntax check with `npm run graphene check`. Once there are no syntax errors, do a visual check by running `npm run graphene view <mdPath>` and looking at the .png it generates.
+- When writing to a .gsql file, check your code with `npm run graphene check`.
+- When writing to a Graphene .md file:
+  - First read ALL the linked component docs listed in [Components](#components) above.
+  - Check your code with `npm run graphene check`.
+  - Once there are no syntax errors, do a visual check by running `npm run graphene view <mdPath>` and looking at the .png it generates.

package/dist/ui/internal/telemetry.ts CHANGED Viewed

@@ -6,11 +6,9 @@ let staticErrors: Error[] = []
 let errorProviders: Record<string, ErrorProvider> = {}
 window.addEventListener('error', (event) => {
-  console.log('recordedError')
   staticErrors.push(event.error)
 })
 window.addEventListener('unhandledrejection', (event) => {
-  console.log('record unhandled')
   staticErrors.push(event.reason)
 })

package/package.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "main": "cli.ts",
   "type": "module",
   "author": "Graphene Systems Inc",
-  "version": "0.0.2",
+  "version": "0.0.4",
   "license": "Elastic-2.0",
   "engines": {
     "node": ">=16"
@@ -25,9 +25,9 @@
   "dependencies": {
     "@duckdb/node-api": "1.3.2-alpha.26",
     "@google-cloud/bigquery": "^8.1.1",
+    "@graphenedata/malloy": "0.0.304",
     "@lezer/common": "^1.2.3",
     "@lezer/lr": "^1.4.2",
-    "@graphenedata/malloy": "0.0.304",
     "@sveltejs/vite-plugin-svelte": "3.1.2",
     "@tidyjs/tidy": "^2.5.2",
     "chalk": "^5.3.0",
@@ -36,7 +36,6 @@
     "cli-table3": "^0.6.3",
     "commander": "^11.0.0",
     "debounce": "^1.2.1",
-    "dompurify": "^3.2.7",
     "echarts": "^5.5.0",
     "fs-extra": "11.2.0",
     "glob": "^11.0.3",
@@ -44,10 +43,7 @@
     "marked": "^16.3.0",
     "mdsvex": "^0.12.6",
     "nanoid": "3.3.8",
-    "rehype-stringify": "^10.0.1",
-    "remark": "^15.0.1",
-    "remark-mdx": "^3.1.1",
-    "remark-rehype": "^11.1.2",
+    "sanitize-html": "^2.17.0",
     "ssf": "^0.11.2",
     "svelte": "4.2.19",
     "unist-util-visit": "4.1.2",
@@ -57,6 +53,7 @@
   "devDependencies": {
     "@types/fs-extra": "^11.0.4",
     "@types/node": "^20.0.0",
+    "@types/sanitize-html": "^2.16.0",
     "@types/ws": "^8.18.1",
     "esbuild": "^0.21.5",
     "vitest": "3.0.5",

package/dist/ui/layout.svelte DELETED Viewed

@@ -1,3 +0,0 @@
-<main>
-  <slot />
-</main>