@revos/cli 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -13
- package/dist/adapters/oclif/commands/auth/login.mjs +2 -2
- package/dist/adapters/oclif/commands/auth/logout.mjs +2 -2
- package/dist/adapters/oclif/commands/auth/status.mjs +2 -2
- package/dist/adapters/oclif/commands/init.mjs +2 -2
- package/dist/adapters/oclif/commands/org/current.mjs +2 -2
- package/dist/adapters/oclif/commands/org/list.mjs +2 -2
- package/dist/adapters/oclif/commands/org/switch.mjs +2 -2
- package/dist/adapters/oclif/commands/overlays/diff.d.mts +1 -1
- package/dist/adapters/oclif/commands/overlays/diff.mjs +3 -3
- package/dist/adapters/oclif/commands/overlays/pull.d.mts +1 -1
- package/dist/adapters/oclif/commands/overlays/pull.mjs +3 -3
- package/dist/adapters/oclif/commands/overlays/push.d.mts +1 -1
- package/dist/adapters/oclif/commands/overlays/push.mjs +3 -3
- package/dist/adapters/oclif/commands/overlays/status.d.mts +1 -1
- package/dist/adapters/oclif/commands/overlays/status.mjs +3 -3
- package/dist/{base.command-DDSLyx5v.mjs → base.command-DlVQ9Cqa.mjs} +1 -1
- package/dist/{core-EJgxP-x5.mjs → core-gKJ_V-K5.mjs} +43 -18
- package/dist/{index-DH6vy050.d.mts → index-B8n2GxTc.d.mts} +1 -1
- package/dist/index.d.mts +3 -3
- package/dist/index.mjs +1 -1
- package/dist/templates/AGENTS.md +1 -1
- package/dist/templates/dbt/profiles.yml +12 -0
- package/dist/templates/gitignore +19 -0
- package/dist/templates/skills/create-dbt-transformations/SKILL.md +214 -0
- package/dist/templates/skills/create-dbt-transformations/references/edge-cases.md +46 -0
- package/dist/templates/skills/create-dbt-transformations/references/schema-conventions.md +128 -0
- package/dist/templates/skills/create-dbt-transformations/references/sql-templates.md +73 -0
- package/dist/templates/skills/create-semantic-model/SKILL.md +126 -1432
- package/dist/templates/skills/create-semantic-model/references/cube-examples.md +267 -0
- package/dist/templates/skills/create-semantic-model/references/key-patterns.md +150 -0
- package/dist/templates/skills/create-semantic-model/references/validation-queries.md +209 -0
- package/dist/templates/skills/explore-lakehouse/SKILL.md +8 -1
- package/dist/{types-DZssnweO.d.mts → types-DmuJzN0Z.d.mts} +5 -1
- package/package.json +2 -1
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
# Cube Overlay Examples
|
|
2
|
+
|
|
3
|
+
## Table of Contents
|
|
4
|
+
|
|
5
|
+
- [Standard Cube](#standard-cube)
|
|
6
|
+
- [Bridge / Junction Cube](#bridge--junction-cube)
|
|
7
|
+
- [Composite Primary Key](#composite-primary-key)
|
|
8
|
+
- [Join Direction Examples](#join-direction-examples)
|
|
9
|
+
- [Refresh Key Variants](#refresh-key-variants)
|
|
10
|
+
- [Type Mapping](#type-mapping)
|
|
11
|
+
- [Measure Suggestions](#measure-suggestions)
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Standard Cube
|
|
16
|
+
|
|
17
|
+
```yaml
|
|
18
|
+
name: hubspot_companies
|
|
19
|
+
sql_table: "`revos_1737556292084.gold_hubspot_companies`"
|
|
20
|
+
|
|
21
|
+
joins:
|
|
22
|
+
companies_deals:
|
|
23
|
+
sql: "${CUBE}.id = ${companies_deals}.company_id"
|
|
24
|
+
relationship: one_to_many
|
|
25
|
+
|
|
26
|
+
measures:
|
|
27
|
+
count:
|
|
28
|
+
type: count
|
|
29
|
+
|
|
30
|
+
total_deal_value:
|
|
31
|
+
sql: "${CUBE}.properties_hs_total_deal_value"
|
|
32
|
+
type: sum
|
|
33
|
+
|
|
34
|
+
dimensions:
|
|
35
|
+
id:
|
|
36
|
+
sql: "${CUBE}.id"
|
|
37
|
+
type: string
|
|
38
|
+
primary_key: true
|
|
39
|
+
|
|
40
|
+
airbyte_extracted_at:
|
|
41
|
+
sql: "${CUBE}._airbyte_extracted_at"
|
|
42
|
+
type: time
|
|
43
|
+
|
|
44
|
+
refresh_key:
|
|
45
|
+
sql: "SELECT MAX(_airbyte_extracted_at) FROM `revos_1737556292084.gold_hubspot_companies`"
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Notes:
|
|
49
|
+
|
|
50
|
+
1. Cube `name` is `hubspot_companies` (no `gold_` prefix).
|
|
51
|
+
2. `sql_table` references `gold_hubspot_companies` (with `gold_` prefix), in backticks.
|
|
52
|
+
3. The join references `${companies_deals}` — the cube name of a bridge cube defined in `semantic/companies_deals.yml`.
|
|
53
|
+
4. Only `_airbyte_extracted_at` is exposed from Airbyte metadata, as `airbyte_extracted_at`.
|
|
54
|
+
5. `refresh_key.sql` uses the same fully qualified table name as `sql_table`.
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## Bridge / Junction Cube
|
|
59
|
+
|
|
60
|
+
```yaml
|
|
61
|
+
name: companies_deals
|
|
62
|
+
sql_table: "`revos_1737556292084.gold_companies_deals`"
|
|
63
|
+
public: false
|
|
64
|
+
|
|
65
|
+
joins:
|
|
66
|
+
hubspot_companies:
|
|
67
|
+
relationship: many_to_one
|
|
68
|
+
sql: "${CUBE}.company_id = ${hubspot_companies}.id"
|
|
69
|
+
|
|
70
|
+
hubspot_deals:
|
|
71
|
+
relationship: many_to_one
|
|
72
|
+
sql: "${CUBE}.deal_id = ${hubspot_deals}.id"
|
|
73
|
+
|
|
74
|
+
measures:
|
|
75
|
+
count:
|
|
76
|
+
type: count
|
|
77
|
+
|
|
78
|
+
dimensions:
|
|
79
|
+
id:
|
|
80
|
+
sql: "CONCAT(${CUBE}.deal_id, '-', ${CUBE}.company_id)"
|
|
81
|
+
type: string
|
|
82
|
+
primary_key: true
|
|
83
|
+
|
|
84
|
+
deal_id:
|
|
85
|
+
sql: "${CUBE}.deal_id"
|
|
86
|
+
type: string
|
|
87
|
+
|
|
88
|
+
company_id:
|
|
89
|
+
sql: "${CUBE}.company_id"
|
|
90
|
+
type: string
|
|
91
|
+
|
|
92
|
+
airbyte_extracted_at:
|
|
93
|
+
sql: "${CUBE}._airbyte_extracted_at"
|
|
94
|
+
type: time
|
|
95
|
+
|
|
96
|
+
refresh_key:
|
|
97
|
+
sql: "SELECT MAX(_airbyte_extracted_at) FROM `revos_1737556292084.gold_companies_deals`"
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
If the bridge model lacks `_airbyte_extracted_at`, omit that dimension and use:
|
|
101
|
+
|
|
102
|
+
```yaml
|
|
103
|
+
refresh_key:
|
|
104
|
+
every: 1 hour
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
## Composite Primary Key
|
|
110
|
+
|
|
111
|
+
Cube allows exactly one `primary_key: true` per cube. For composite keys, create a synthetic dimension:
|
|
112
|
+
|
|
113
|
+
```yaml
|
|
114
|
+
dimensions:
|
|
115
|
+
id:
|
|
116
|
+
sql: "CONCAT(${CUBE}.office_unique_id, '-', ${CUBE}.month)"
|
|
117
|
+
type: string
|
|
118
|
+
primary_key: true
|
|
119
|
+
|
|
120
|
+
office_unique_id:
|
|
121
|
+
sql: "${CUBE}.office_unique_id"
|
|
122
|
+
type: string
|
|
123
|
+
|
|
124
|
+
month:
|
|
125
|
+
sql: "${CUBE}.month"
|
|
126
|
+
type: time
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Choose a separator that does not appear in component values. `-` is usually safe; use `||` if components may contain `-`.
|
|
130
|
+
|
|
131
|
+
Joins to this cube must reference the synthetic `id`, not individual components.
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## Join Direction Examples
|
|
136
|
+
|
|
137
|
+
Direction is always from the perspective of the current cube.
|
|
138
|
+
|
|
139
|
+
### Direct many-to-one / one-to-many
|
|
140
|
+
|
|
141
|
+
```yaml
|
|
142
|
+
# In hubspot_deals.yml
|
|
143
|
+
joins:
|
|
144
|
+
hubspot_companies:
|
|
145
|
+
sql: "${CUBE}.company_id = ${hubspot_companies}.id"
|
|
146
|
+
relationship: many_to_one
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
```yaml
|
|
150
|
+
# In hubspot_companies.yml
|
|
151
|
+
joins:
|
|
152
|
+
hubspot_deals:
|
|
153
|
+
sql: "${CUBE}.id = ${hubspot_deals}.company_id"
|
|
154
|
+
relationship: one_to_many
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Connector path (products -> clients -> addresses)
|
|
158
|
+
|
|
159
|
+
```yaml
|
|
160
|
+
# In products.yml
|
|
161
|
+
joins:
|
|
162
|
+
clients:
|
|
163
|
+
sql: "${CUBE}.client_id = ${clients}.id"
|
|
164
|
+
relationship: many_to_one
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
```yaml
|
|
168
|
+
# In clients.yml
|
|
169
|
+
joins:
|
|
170
|
+
products:
|
|
171
|
+
sql: "${CUBE}.id = ${products}.client_id"
|
|
172
|
+
relationship: one_to_many
|
|
173
|
+
addresses:
|
|
174
|
+
sql: "${CUBE}.id = ${addresses}.client_id"
|
|
175
|
+
relationship: one_to_many
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
```yaml
|
|
179
|
+
# In addresses.yml
|
|
180
|
+
joins:
|
|
181
|
+
clients:
|
|
182
|
+
sql: "${CUBE}.client_id = ${clients}.id"
|
|
183
|
+
relationship: many_to_one
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### Bridge joins (both parents reference bridge)
|
|
187
|
+
|
|
188
|
+
```yaml
|
|
189
|
+
# In hubspot_companies.yml
|
|
190
|
+
joins:
|
|
191
|
+
companies_deals:
|
|
192
|
+
sql: "${CUBE}.id = ${companies_deals}.company_id"
|
|
193
|
+
relationship: one_to_many
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
```yaml
|
|
197
|
+
# In hubspot_deals.yml
|
|
198
|
+
joins:
|
|
199
|
+
companies_deals:
|
|
200
|
+
sql: "${CUBE}.id = ${companies_deals}.deal_id"
|
|
201
|
+
relationship: one_to_many
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### Unvalidated join
|
|
205
|
+
|
|
206
|
+
```yaml
|
|
207
|
+
joins:
|
|
208
|
+
hubspot_companies:
|
|
209
|
+
# UNVALIDATED: match rate could not be measured because gold_hubspot_companies was not yet materialized
|
|
210
|
+
sql: "${CUBE}.company_id = ${hubspot_companies}.id"
|
|
211
|
+
relationship: many_to_one
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
---
|
|
215
|
+
|
|
216
|
+
## Refresh Key Variants
|
|
217
|
+
|
|
218
|
+
Priority order:
|
|
219
|
+
|
|
220
|
+
```yaml
|
|
221
|
+
# 1. Airbyte timestamp (preferred)
|
|
222
|
+
refresh_key:
|
|
223
|
+
sql: "SELECT MAX(_airbyte_extracted_at) FROM `<dataset>.<gold_model>`"
|
|
224
|
+
|
|
225
|
+
# 2. Other reliable timestamp
|
|
226
|
+
refresh_key:
|
|
227
|
+
sql: "SELECT MAX(updated_at) FROM `<dataset>.<gold_model>`"
|
|
228
|
+
|
|
229
|
+
# 3. Default fallback
|
|
230
|
+
refresh_key:
|
|
231
|
+
every: 1 hour
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
`refresh_key.sql` must reference the same fully qualified table as the cube's `sql_table`.
|
|
235
|
+
|
|
236
|
+
---
|
|
237
|
+
|
|
238
|
+
## Type Mapping
|
|
239
|
+
|
|
240
|
+
```text
|
|
241
|
+
STRING / VARCHAR / TEXT -> string
|
|
242
|
+
INTEGER / FLOAT / NUMERIC -> number
|
|
243
|
+
BOOLEAN / BOOL -> boolean
|
|
244
|
+
DATE / DATETIME / TIMESTAMP -> time
|
|
245
|
+
JSON / ARRAY / STRUCT -> string (or skip if not queryable)
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
---
|
|
249
|
+
|
|
250
|
+
## Measure Suggestions
|
|
251
|
+
|
|
252
|
+
Common measure patterns by column name:
|
|
253
|
+
|
|
254
|
+
```text
|
|
255
|
+
amount -> total_amount (sum), average_amount (avg)
|
|
256
|
+
revenue -> total_revenue (sum)
|
|
257
|
+
price -> total_price or average_price
|
|
258
|
+
cost -> total_cost (sum)
|
|
259
|
+
quantity -> total_quantity (sum)
|
|
260
|
+
duration -> average_duration (avg)
|
|
261
|
+
*_id -> count_distinct (only in the cube that owns the FK)
|
|
262
|
+
created_at -> first_created_at (min), last_created_at (max)
|
|
263
|
+
closed_at -> first_closed_at (min), last_closed_at (max)
|
|
264
|
+
updated_at -> last_updated_at (max)
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
`count_distinct` on FK columns: define inside the cube that owns the FK, not the parent cube. Joins can produce row fan-out that distorts distinct counts.
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# Key Detection Patterns
|
|
2
|
+
|
|
3
|
+
## Table of Contents
|
|
4
|
+
|
|
5
|
+
- [Primary Key Patterns](#primary-key-patterns)
|
|
6
|
+
- [Secondary Key Patterns](#secondary-key-patterns)
|
|
7
|
+
- [Foreign Key Patterns](#foreign-key-patterns)
|
|
8
|
+
- [JSON / Array Key Patterns](#json--array-key-patterns)
|
|
9
|
+
- [Schema Summary Output](#schema-summary-output)
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Primary Key Patterns
|
|
14
|
+
|
|
15
|
+
Common primary key column names:
|
|
16
|
+
|
|
17
|
+
```text
|
|
18
|
+
id
|
|
19
|
+
<entity>_id
|
|
20
|
+
<model_name>_id
|
|
21
|
+
uuid
|
|
22
|
+
unique_id
|
|
23
|
+
external_id
|
|
24
|
+
source_id
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Examples:
|
|
28
|
+
|
|
29
|
+
```text
|
|
30
|
+
companies.id
|
|
31
|
+
companies.company_id
|
|
32
|
+
companies.office_unique_id
|
|
33
|
+
hubspot_companies.properties_company_unique_id
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Rules:
|
|
37
|
+
|
|
38
|
+
1. Prefer a known business or platform identifier over a generated row number.
|
|
39
|
+
2. Prefer stable IDs over names or labels.
|
|
40
|
+
3. Do not mark a column as primary key only because it looks unique by name.
|
|
41
|
+
4. Validate uniqueness with SQL (see references/validation-queries.md, section 1).
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## Secondary Key Patterns
|
|
46
|
+
|
|
47
|
+
Secondary keys are identifiers that are not the table primary key but can be used for joins, grouping, lookup, or `count_distinct` measures.
|
|
48
|
+
|
|
49
|
+
Common patterns:
|
|
50
|
+
|
|
51
|
+
```text
|
|
52
|
+
office_unique_id
|
|
53
|
+
company_id
|
|
54
|
+
customer_id
|
|
55
|
+
client_id
|
|
56
|
+
deal_id
|
|
57
|
+
contact_id
|
|
58
|
+
owner_id
|
|
59
|
+
user_id
|
|
60
|
+
account_id
|
|
61
|
+
product_id
|
|
62
|
+
address_id
|
|
63
|
+
external_id
|
|
64
|
+
source_id
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Rules:
|
|
68
|
+
|
|
69
|
+
1. Track secondary keys explicitly.
|
|
70
|
+
2. Secondary keys may be foreign keys to another entity.
|
|
71
|
+
3. Secondary keys should usually become Cube dimensions.
|
|
72
|
+
4. Secondary keys may support `count_distinct` measures if analytically useful, but only inside the cube that owns the FK (see Phase 7 caution about fan-out).
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
## Foreign Key Patterns
|
|
77
|
+
|
|
78
|
+
Common patterns:
|
|
79
|
+
|
|
80
|
+
```text
|
|
81
|
+
<entity>_id
|
|
82
|
+
<entity>Id
|
|
83
|
+
fk_<entity>
|
|
84
|
+
associated_<entity>_id
|
|
85
|
+
parent_<entity>_id
|
|
86
|
+
owner_id
|
|
87
|
+
created_by_user_id
|
|
88
|
+
updated_by_user_id
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Also check JSON and array-based foreign keys:
|
|
92
|
+
|
|
93
|
+
```text
|
|
94
|
+
deals.companies -> companies.id
|
|
95
|
+
companies.deals -> deals.id
|
|
96
|
+
contacts.associated_company_ids -> companies.id
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## JSON / Array Key Patterns
|
|
102
|
+
|
|
103
|
+
Keys may be hidden inside JSON strings, JSON arrays, repeated fields, or nested structures. This is especially common for one-to-many and many-to-many relationships.
|
|
104
|
+
|
|
105
|
+
Common column names that may contain relationship keys:
|
|
106
|
+
|
|
107
|
+
```text
|
|
108
|
+
companies, deals, contacts, users, owners, clients, products, addresses
|
|
109
|
+
associations, associated_companies, associated_deals, associated_contacts
|
|
110
|
+
associated_clients, associated_products
|
|
111
|
+
company_ids, deal_ids, contact_ids, client_ids, product_ids, address_ids
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
Example: `gold_hubspot_deals.companies` may contain an array of company IDs.
|
|
115
|
+
|
|
116
|
+
For JSON arrays, use `UNNEST(JSON_VALUE_ARRAY(...))`.
|
|
117
|
+
|
|
118
|
+
Rules:
|
|
119
|
+
|
|
120
|
+
1. Always inspect JSON, array, repeated, and nested fields for hidden relationship keys.
|
|
121
|
+
2. Do not assume relationship keys only exist as flat columns.
|
|
122
|
+
3. If JSON structure is unknown, inspect sample values first:
|
|
123
|
+
|
|
124
|
+
```sql
|
|
125
|
+
SELECT <json_or_array_column>
|
|
126
|
+
FROM `<dataset>.<gold_model>`
|
|
127
|
+
WHERE <json_or_array_column> IS NOT NULL
|
|
128
|
+
LIMIT 20;
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
4. If a relationship is stored as an array of IDs, use or create an approved bridge/support model. Bridge model creation is delegated to `create-dbt-transformations`.
|
|
132
|
+
5. Bridge models should preserve both sides of the relationship as keys.
|
|
133
|
+
6. Bridge and junction cubes should use `public: false` where the project convention supports it.
|
|
134
|
+
|
|
135
|
+
---
|
|
136
|
+
|
|
137
|
+
## Schema Summary Output
|
|
138
|
+
|
|
139
|
+
After analysis, summarize each selected model in this format:
|
|
140
|
+
|
|
141
|
+
```text
|
|
142
|
+
Model: gold_hubspot_deals
|
|
143
|
+
Columns: 18
|
|
144
|
+
Candidate primary key: deal_id
|
|
145
|
+
Secondary keys: company_id, owner_id
|
|
146
|
+
JSON / array relationship columns: companies, contacts
|
|
147
|
+
Time columns: created_at, updated_at, closed_at
|
|
148
|
+
Numeric metric-like columns: amount
|
|
149
|
+
Airbyte columns present: _airbyte_extracted_at (will be exposed), _airbyte_raw_id, _airbyte_meta, _airbyte_generation_id (will be excluded by default)
|
|
150
|
+
```
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
# Join Validation SQL Templates
|
|
2
|
+
|
|
3
|
+
All queries use the literal `<dataset>` value resolved at the start of Phase 2. Substitute the placeholder before executing.
|
|
4
|
+
|
|
5
|
+
## Table of Contents
|
|
6
|
+
|
|
7
|
+
- [1. Key Uniqueness](#1-key-uniqueness)
|
|
8
|
+
- [2. Many-to-One Direction](#2-many-to-one-direction)
|
|
9
|
+
- [3. Reverse One-to-Many Direction](#3-reverse-one-to-many-direction)
|
|
10
|
+
- [4. One-to-One Relationships](#4-one-to-one-relationships)
|
|
11
|
+
- [5. Many-to-Many Through Bridge](#5-many-to-many-through-bridge)
|
|
12
|
+
- [6. JSON / Array Relationships](#6-json--array-relationships)
|
|
13
|
+
- [7. Type Compatibility](#7-type-compatibility)
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## 1. Key Uniqueness
|
|
18
|
+
|
|
19
|
+
```sql
|
|
20
|
+
SELECT
|
|
21
|
+
COUNT(*) AS total_rows,
|
|
22
|
+
COUNT(DISTINCT <candidate_pk>) AS distinct_keys,
|
|
23
|
+
COUNT(*) - COUNT(DISTINCT <candidate_pk>) AS duplicate_count
|
|
24
|
+
FROM `<dataset>.<gold_model>`;
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
A primary key should have `duplicate_count = 0`. If duplicates exist, do not mark the column as `primary_key: true` unless clearly documented.
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## 2. Many-to-One Direction
|
|
32
|
+
|
|
33
|
+
Example: `deals.company_id -> companies.id` (many_to_one).
|
|
34
|
+
|
|
35
|
+
Validate FK match rate:
|
|
36
|
+
|
|
37
|
+
```sql
|
|
38
|
+
SELECT
|
|
39
|
+
COUNT(*) AS total_rows_with_fk,
|
|
40
|
+
COUNT(c.id) AS matched_rows,
|
|
41
|
+
COUNT(*) - COUNT(c.id) AS unmatched_rows,
|
|
42
|
+
ROUND(100.0 * COUNT(c.id) / COUNT(*), 2) AS match_percentage
|
|
43
|
+
FROM `<dataset>.gold_hubspot_deals` d
|
|
44
|
+
LEFT JOIN `<dataset>.gold_hubspot_companies` c
|
|
45
|
+
ON d.company_id = c.id
|
|
46
|
+
WHERE d.company_id IS NOT NULL;
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Check for fan-out (should be empty for valid many-to-one):
|
|
50
|
+
|
|
51
|
+
```sql
|
|
52
|
+
SELECT
|
|
53
|
+
d.deal_id,
|
|
54
|
+
COUNT(c.id) AS matched_companies
|
|
55
|
+
FROM `<dataset>.gold_hubspot_deals` d
|
|
56
|
+
LEFT JOIN `<dataset>.gold_hubspot_companies` c
|
|
57
|
+
ON d.company_id = c.id
|
|
58
|
+
WHERE d.company_id IS NOT NULL
|
|
59
|
+
GROUP BY d.deal_id
|
|
60
|
+
HAVING COUNT(c.id) > 1
|
|
61
|
+
LIMIT 20;
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
## 3. Reverse One-to-Many Direction
|
|
67
|
+
|
|
68
|
+
Validate reverse aggregation:
|
|
69
|
+
|
|
70
|
+
```sql
|
|
71
|
+
SELECT
|
|
72
|
+
c.id AS company_id,
|
|
73
|
+
COUNT(d.deal_id) AS deal_count
|
|
74
|
+
FROM `<dataset>.gold_hubspot_companies` c
|
|
75
|
+
LEFT JOIN `<dataset>.gold_hubspot_deals` d
|
|
76
|
+
ON c.id = d.company_id
|
|
77
|
+
GROUP BY c.id
|
|
78
|
+
ORDER BY deal_count DESC
|
|
79
|
+
LIMIT 20;
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Cross-check sampled counts:
|
|
83
|
+
|
|
84
|
+
```sql
|
|
85
|
+
SELECT
|
|
86
|
+
company_id,
|
|
87
|
+
COUNT(*) AS expected_deal_count
|
|
88
|
+
FROM `<dataset>.gold_hubspot_deals`
|
|
89
|
+
WHERE company_id IN (<sample_company_ids>)
|
|
90
|
+
GROUP BY company_id;
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Counts must match.
|
|
94
|
+
|
|
95
|
+
---
|
|
96
|
+
|
|
97
|
+
## 4. One-to-One Relationships
|
|
98
|
+
|
|
99
|
+
Validate uniqueness on both sides, then validate the join:
|
|
100
|
+
|
|
101
|
+
```sql
|
|
102
|
+
SELECT
|
|
103
|
+
COUNT(*) AS total_rows,
|
|
104
|
+
COUNT(r.<right_key>) AS matched_rows,
|
|
105
|
+
COUNT(*) - COUNT(r.<right_key>) AS unmatched_rows,
|
|
106
|
+
ROUND(100.0 * COUNT(r.<right_key>) / COUNT(*), 2) AS match_percentage
|
|
107
|
+
FROM `<dataset>.<left_model>` l
|
|
108
|
+
LEFT JOIN `<dataset>.<right_model>` r
|
|
109
|
+
ON l.<left_key> = r.<right_key>
|
|
110
|
+
WHERE l.<left_key> IS NOT NULL;
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Also validate reverse. If either side has duplicate keys, the relationship is not one-to-one.
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
## 5. Many-to-Many Through Bridge
|
|
118
|
+
|
|
119
|
+
Validate both bridge edges. Example: `companies <-> deals through gold_companies_deals`.
|
|
120
|
+
|
|
121
|
+
Bridge to one parent:
|
|
122
|
+
|
|
123
|
+
```sql
|
|
124
|
+
SELECT
|
|
125
|
+
COUNT(*) AS total_bridge_rows,
|
|
126
|
+
COUNT(c.id) AS matched_companies,
|
|
127
|
+
COUNT(*) - COUNT(c.id) AS unmatched_companies,
|
|
128
|
+
ROUND(100.0 * COUNT(c.id) / COUNT(*), 2) AS match_percentage
|
|
129
|
+
FROM `<dataset>.gold_companies_deals` b
|
|
130
|
+
LEFT JOIN `<dataset>.gold_hubspot_companies` c
|
|
131
|
+
ON b.company_id = c.id
|
|
132
|
+
WHERE b.company_id IS NOT NULL;
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
Run analogous query for the other parent. Then validate reverse aggregations:
|
|
136
|
+
|
|
137
|
+
```sql
|
|
138
|
+
SELECT
|
|
139
|
+
c.id AS company_id,
|
|
140
|
+
COUNT(b.deal_id) AS related_deals
|
|
141
|
+
FROM `<dataset>.gold_hubspot_companies` c
|
|
142
|
+
LEFT JOIN `<dataset>.gold_companies_deals` b
|
|
143
|
+
ON c.id = b.company_id
|
|
144
|
+
GROUP BY c.id
|
|
145
|
+
ORDER BY related_deals DESC
|
|
146
|
+
LIMIT 20;
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
Same query swapped for deals -> bridge -> companies. Report sampled counts.
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
## 6. JSON / Array Relationships
|
|
154
|
+
|
|
155
|
+
Validate extracted keys:
|
|
156
|
+
|
|
157
|
+
```sql
|
|
158
|
+
WITH extracted AS (
|
|
159
|
+
SELECT DISTINCT
|
|
160
|
+
src.<source_pk> AS source_id,
|
|
161
|
+
extracted_id
|
|
162
|
+
FROM `<dataset>.<source_model>` src,
|
|
163
|
+
UNNEST(JSON_VALUE_ARRAY(src.<json_array_column>)) AS extracted_id
|
|
164
|
+
)
|
|
165
|
+
SELECT
|
|
166
|
+
COUNT(*) AS total_relationships,
|
|
167
|
+
COUNT(tgt.<target_pk>) AS matched_relationships,
|
|
168
|
+
COUNT(*) - COUNT(tgt.<target_pk>) AS unmatched_relationships,
|
|
169
|
+
ROUND(100.0 * COUNT(tgt.<target_pk>) / COUNT(*), 2) AS match_percentage
|
|
170
|
+
FROM extracted e
|
|
171
|
+
LEFT JOIN `<dataset>.<target_model>` tgt
|
|
172
|
+
ON e.extracted_id = tgt.<target_pk>;
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
Sample matched values:
|
|
176
|
+
|
|
177
|
+
```sql
|
|
178
|
+
WITH extracted AS (
|
|
179
|
+
SELECT DISTINCT
|
|
180
|
+
src.<source_pk> AS source_id,
|
|
181
|
+
extracted_id
|
|
182
|
+
FROM `<dataset>.<source_model>` src,
|
|
183
|
+
UNNEST(JSON_VALUE_ARRAY(src.<json_array_column>)) AS extracted_id
|
|
184
|
+
)
|
|
185
|
+
SELECT
|
|
186
|
+
e.source_id, e.extracted_id, tgt.<target_pk>, tgt.<display_column>
|
|
187
|
+
FROM extracted e
|
|
188
|
+
LEFT JOIN `<dataset>.<target_model>` tgt
|
|
189
|
+
ON e.extracted_id = tgt.<target_pk>
|
|
190
|
+
LIMIT 10;
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
---
|
|
194
|
+
|
|
195
|
+
## 7. Type Compatibility
|
|
196
|
+
|
|
197
|
+
```sql
|
|
198
|
+
SELECT column_name, data_type
|
|
199
|
+
FROM `<dataset>.INFORMATION_SCHEMA.COLUMNS`
|
|
200
|
+
WHERE table_name IN ('<source_model>', '<target_model>')
|
|
201
|
+
AND column_name IN ('<foreign_key>', '<target_pk>');
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
If types differ:
|
|
205
|
+
|
|
206
|
+
1. Report the mismatch.
|
|
207
|
+
2. Prefer fixing type alignment in the dbt model or approved support model.
|
|
208
|
+
3. Only cast in Cube join SQL when necessary.
|
|
209
|
+
4. Prefer casting the foreign-key side to match the primary-key side.
|
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: explore-lakehouse
|
|
3
|
-
description:
|
|
3
|
+
description: >
|
|
4
|
+
Inspect the RevOS BigQuery lakehouse: list datasets and tables, introspect table schemas
|
|
5
|
+
and column types, preview sample rows, assess data layers (bronze/silver/gold), and check
|
|
6
|
+
data completeness and null rates. Required companion skill for create-dbt-transformations
|
|
7
|
+
and create-semantic-model — load before generating dbt models or semantic overlays to
|
|
8
|
+
introspect warehouse columns and types. Use when asked to: explore the lakehouse, list
|
|
9
|
+
BigQuery tables, inspect a table schema, preview data, check raw source tables, assess data
|
|
10
|
+
quality, check null rates, understand available data, or perform BigQuery schema introspection.
|
|
4
11
|
---
|
|
5
12
|
|
|
6
13
|
# Explore Lakehouse
|
|
@@ -3,6 +3,10 @@ import { CreateOverlayDto, OverlaysResponseDto } from "@revos/api-client";
|
|
|
3
3
|
//#region src/core/types.d.ts
|
|
4
4
|
type CubeOverlay = OverlaysResponseDto["data"][number];
|
|
5
5
|
type CubeDefinition = CubeOverlay["data"];
|
|
6
|
+
type OverlayFileData = CubeDefinition & {
|
|
7
|
+
name?: string;
|
|
8
|
+
description?: string;
|
|
9
|
+
};
|
|
6
10
|
type OverlayFile = CreateOverlayDto;
|
|
7
11
|
interface Config {
|
|
8
12
|
apiUrl: string;
|
|
@@ -62,4 +66,4 @@ interface DiffResult {
|
|
|
62
66
|
entries: DiffEntry[];
|
|
63
67
|
}
|
|
64
68
|
//#endregion
|
|
65
|
-
export { DiffEntry as a,
|
|
69
|
+
export { DiffEntry as a, OverlayFileData as c, PushResult as d, StatusResult as f, DiffChange as i, OverlayStatusInfo as l, CubeDefinition as n, DiffResult as o, SyncStatus as p, CubeOverlay as r, OverlayFile as s, Config as t, PullResult as u };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@revos/cli",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.4",
|
|
4
4
|
"description": "RevOS CLI for managing RevOS platform resources",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -46,6 +46,7 @@
|
|
|
46
46
|
"@oclif/table": "^0.5.4",
|
|
47
47
|
"chalk": "^4.1.2",
|
|
48
48
|
"open": "^10.1.0",
|
|
49
|
+
"yaml": "^2.8.3",
|
|
49
50
|
"@revos/api-client": "0.1.0"
|
|
50
51
|
},
|
|
51
52
|
"devDependencies": {
|