manifold-cli 0.0.15 → 0.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/manifold/api/schema_generator.rb +26 -21
- data/lib/manifold/api/workspace.rb +120 -36
- data/lib/manifold/cli.rb +1 -1
- data/lib/manifold/templates/workspace_template.yml +26 -15
- data/lib/manifold/terraform/workspace_configuration.rb +265 -49
- data/lib/manifold/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 61582d1f6df6c8a674f9ef9b2374ae231fc98da47435ea5893d5077cb4dad34e
|
4
|
+
data.tar.gz: ab91d3938b7a382ac003a467cd04d19cc6059804fe21e099e7e2aa60675eed3a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5cc06d2a611b1b29edbb466ab20cf8b82d4b99d609f57b71bd5e7f6098e7fcbd636c46a662306b8318c429148fbba3b4b6d6ddc3c603cddcb53ff81c9fe50f4c
|
7
|
+
data.tar.gz: b8e44688d8e47628dc718bf992eb28114fde80a7cd60975e7a49cf91ce05fdf3ed2b6bc349c73332801ed5934b0a56a6f71ca100ad4b02df763c2f67cd2a3f8b
|
@@ -4,8 +4,6 @@ module Manifold
|
|
4
4
|
module API
|
5
5
|
# Handles schema generation for Manifold tables
|
6
6
|
class SchemaGenerator
|
7
|
-
VALID_OPERATORS = %w[AND OR NOT NAND NOR XOR XNOR].freeze
|
8
|
-
|
9
7
|
def initialize(dimensions_fields, manifold_yaml)
|
10
8
|
@dimensions_fields = dimensions_fields
|
11
9
|
@manifold_yaml = manifold_yaml
|
@@ -33,39 +31,52 @@ module Manifold
|
|
33
31
|
private
|
34
32
|
|
35
33
|
def metrics_fields
|
36
|
-
return [] unless @manifold_yaml["
|
34
|
+
return [] unless @manifold_yaml["metrics"]
|
35
|
+
|
36
|
+
@manifold_yaml["metrics"].map do |group_name, group_config|
|
37
|
+
{
|
38
|
+
"name" => group_name,
|
39
|
+
"type" => "RECORD",
|
40
|
+
"mode" => "NULLABLE",
|
41
|
+
"fields" => group_metrics_fields(group_config)
|
42
|
+
}
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def group_metrics_fields(group_config)
|
47
|
+
return [] unless group_config["breakouts"] && group_config["aggregations"]
|
37
48
|
|
38
|
-
|
49
|
+
group_config["breakouts"].map do |breakout_name, _breakout_config|
|
39
50
|
{
|
40
|
-
"name" =>
|
51
|
+
"name" => breakout_name,
|
41
52
|
"type" => "RECORD",
|
42
53
|
"mode" => "NULLABLE",
|
43
|
-
"fields" =>
|
54
|
+
"fields" => breakout_metrics_fields(group_config)
|
44
55
|
}
|
45
56
|
end
|
46
57
|
end
|
47
58
|
|
48
|
-
def
|
59
|
+
def breakout_metrics_fields(group_config)
|
49
60
|
[
|
50
|
-
*countif_fields,
|
51
|
-
*sumif_fields
|
61
|
+
*countif_fields(group_config),
|
62
|
+
*sumif_fields(group_config)
|
52
63
|
]
|
53
64
|
end
|
54
65
|
|
55
|
-
def countif_fields
|
56
|
-
return [] unless
|
66
|
+
def countif_fields(group_config)
|
67
|
+
return [] unless group_config.dig("aggregations", "countif")
|
57
68
|
|
58
69
|
[{
|
59
|
-
"name" =>
|
70
|
+
"name" => group_config["aggregations"]["countif"],
|
60
71
|
"type" => "INTEGER",
|
61
72
|
"mode" => "NULLABLE"
|
62
73
|
}]
|
63
74
|
end
|
64
75
|
|
65
|
-
def sumif_fields
|
66
|
-
return [] unless
|
76
|
+
def sumif_fields(group_config)
|
77
|
+
return [] unless group_config.dig("aggregations", "sumif")
|
67
78
|
|
68
|
-
|
79
|
+
group_config["aggregations"]["sumif"].keys.map do |metric_name|
|
69
80
|
{
|
70
81
|
"name" => metric_name,
|
71
82
|
"type" => "INTEGER",
|
@@ -73,12 +84,6 @@ module Manifold
|
|
73
84
|
}
|
74
85
|
end
|
75
86
|
end
|
76
|
-
|
77
|
-
def validate_operator!(operator)
|
78
|
-
return if VALID_OPERATORS.include?(operator)
|
79
|
-
|
80
|
-
raise ArgumentError, "Invalid operator: #{operator}. Valid operators are: #{VALID_OPERATORS.join(", ")}"
|
81
|
-
end
|
82
87
|
end
|
83
88
|
end
|
84
89
|
end
|
@@ -4,6 +4,8 @@ module Manifold
|
|
4
4
|
module API
|
5
5
|
# Handles terraform configuration generation
|
6
6
|
class TerraformGenerator
|
7
|
+
attr_accessor :manifold_config
|
8
|
+
|
7
9
|
def initialize(name, vectors, vector_service, manifold_yaml)
|
8
10
|
@name = name
|
9
11
|
@vectors = vectors
|
@@ -17,11 +19,91 @@ module Manifold
|
|
17
19
|
vector_config = @vector_service.load_vector_config(vector)
|
18
20
|
config.add_vector(vector_config)
|
19
21
|
end
|
20
|
-
config.
|
22
|
+
config.dimensions_config = @manifold_yaml["dimensions"]&.fetch("merge", nil) if @manifold_yaml["dimensions"]
|
23
|
+
config.manifold_config = @manifold_yaml
|
21
24
|
config.write(path)
|
22
25
|
end
|
23
26
|
end
|
24
27
|
|
28
|
+
# Handles SQL generation for manifold workspaces
|
29
|
+
class SqlGenerator
|
30
|
+
def initialize(name, manifold_yaml)
|
31
|
+
@name = name
|
32
|
+
@manifold_yaml = manifold_yaml
|
33
|
+
end
|
34
|
+
|
35
|
+
def generate_dimensions_merge_sql(source_sql)
|
36
|
+
return unless valid_dimensions_config?
|
37
|
+
|
38
|
+
sql_builder = Terraform::SQLBuilder.new(@name, @manifold_yaml)
|
39
|
+
sql_builder.build_dimensions_merge_sql(source_sql)
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def valid_dimensions_config?
|
45
|
+
return false unless @manifold_yaml
|
46
|
+
|
47
|
+
!@manifold_yaml["dimensions"]&.dig("merge", "source").nil?
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Handles schema file generation for manifold workspaces
|
52
|
+
class SchemaWriter
|
53
|
+
def initialize(name, vectors, vector_service, manifold_yaml, logger)
|
54
|
+
@name = name
|
55
|
+
@vectors = vectors
|
56
|
+
@vector_service = vector_service
|
57
|
+
@manifold_yaml = manifold_yaml
|
58
|
+
@logger = logger
|
59
|
+
end
|
60
|
+
|
61
|
+
def write_schemas(tables_directory)
|
62
|
+
tables_directory.mkpath
|
63
|
+
write_dimensions_schema(tables_directory)
|
64
|
+
write_manifold_schema(tables_directory)
|
65
|
+
end
|
66
|
+
|
67
|
+
private
|
68
|
+
|
69
|
+
def write_dimensions_schema(tables_directory)
|
70
|
+
dimensions_path = tables_directory.join("dimensions.json")
|
71
|
+
dimensions_path.write(dimensions_schema_json.concat("\n"))
|
72
|
+
end
|
73
|
+
|
74
|
+
def write_manifold_schema(tables_directory)
|
75
|
+
manifold_path = tables_directory.join("manifold.json")
|
76
|
+
manifold_path.write(manifold_schema_json.concat("\n"))
|
77
|
+
end
|
78
|
+
|
79
|
+
def schema_generator
|
80
|
+
@schema_generator ||= SchemaGenerator.new(dimensions_fields, @manifold_yaml)
|
81
|
+
end
|
82
|
+
|
83
|
+
def manifold_schema
|
84
|
+
schema_generator.manifold_schema
|
85
|
+
end
|
86
|
+
|
87
|
+
def dimensions_schema
|
88
|
+
schema_generator.dimensions_schema
|
89
|
+
end
|
90
|
+
|
91
|
+
def dimensions_fields
|
92
|
+
@dimensions_fields ||= @vectors.filter_map do |vector|
|
93
|
+
@logger.info("Loading vector schema for '#{vector}'.")
|
94
|
+
@vector_service.load_vector_schema(vector)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def dimensions_schema_json
|
99
|
+
JSON.pretty_generate(dimensions_schema)
|
100
|
+
end
|
101
|
+
|
102
|
+
def manifold_schema_json
|
103
|
+
JSON.pretty_generate(manifold_schema)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
25
107
|
# Encapsulates a single manifold.
|
26
108
|
class Workspace
|
27
109
|
attr_reader :name, :template_path, :logger
|
@@ -49,13 +131,13 @@ module Manifold
|
|
49
131
|
def generate(with_terraform: false)
|
50
132
|
return nil unless manifold_exists? && any_vectors?
|
51
133
|
|
52
|
-
|
53
|
-
generate_dimensions
|
54
|
-
generate_manifold
|
134
|
+
generate_schemas
|
55
135
|
logger.info("Generated BigQuery dimensions table schema for workspace '#{name}'.")
|
56
136
|
|
57
137
|
return unless with_terraform
|
58
138
|
|
139
|
+
write_manifold_merge_sql
|
140
|
+
write_dimensions_merge_sql
|
59
141
|
generate_terraform
|
60
142
|
logger.info("Generated Terraform configuration for workspace '#{name}'.")
|
61
143
|
end
|
@@ -86,53 +168,58 @@ module Manifold
|
|
86
168
|
directory.join("main.tf.json")
|
87
169
|
end
|
88
170
|
|
89
|
-
|
171
|
+
def write_manifold_merge_sql
|
172
|
+
return unless manifold_file
|
90
173
|
|
91
|
-
|
92
|
-
|
93
|
-
|
174
|
+
sql_builder = Terraform::SQLBuilder.new(name, manifold_yaml)
|
175
|
+
metrics_builder = Terraform::MetricsBuilder.new(manifold_yaml)
|
176
|
+
sql = sql_builder.build_manifold_merge_sql(metrics_builder) do
|
177
|
+
metrics_builder.build_metrics_struct
|
178
|
+
end
|
94
179
|
|
95
|
-
|
96
|
-
@manifold_yaml ||= YAML.safe_load_file(manifold_path)
|
180
|
+
routines_directory.join("merge_manifold.sql").write(sql)
|
97
181
|
end
|
98
182
|
|
99
|
-
def
|
100
|
-
|
101
|
-
end
|
183
|
+
def write_dimensions_merge_sql
|
184
|
+
return unless dimensions_merge_source_exists?
|
102
185
|
|
103
|
-
|
104
|
-
|
186
|
+
sql = generate_dimensions_merge_sql
|
187
|
+
return unless sql
|
188
|
+
|
189
|
+
write_dimensions_merge_sql_file(sql)
|
105
190
|
end
|
106
191
|
|
107
|
-
def
|
108
|
-
|
192
|
+
def dimensions_merge_source_exists?
|
193
|
+
manifold_yaml["dimensions"]&.dig("merge", "source")
|
109
194
|
end
|
110
195
|
|
111
|
-
def
|
112
|
-
|
196
|
+
def generate_dimensions_merge_sql
|
197
|
+
source_sql = File.read(Pathname.pwd.join(manifold_yaml["dimensions"]["merge"]["source"]))
|
198
|
+
SqlGenerator.new(name, manifold_yaml).generate_dimensions_merge_sql(source_sql)
|
113
199
|
end
|
114
200
|
|
115
|
-
def
|
116
|
-
|
201
|
+
def write_dimensions_merge_sql_file(sql)
|
202
|
+
routines_directory.mkpath
|
203
|
+
dimensions_merge_sql_path.write(sql)
|
117
204
|
end
|
118
205
|
|
119
|
-
def
|
120
|
-
|
206
|
+
def dimensions_merge_sql_path
|
207
|
+
routines_directory.join("merge_dimensions.sql")
|
121
208
|
end
|
122
209
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
end
|
210
|
+
private
|
211
|
+
|
212
|
+
def directory
|
213
|
+
Pathname.pwd.join("workspaces", name)
|
128
214
|
end
|
129
215
|
|
130
|
-
def
|
131
|
-
|
216
|
+
def manifold_yaml
|
217
|
+
@manifold_yaml ||= YAML.safe_load_file(manifold_path)
|
132
218
|
end
|
133
219
|
|
134
|
-
def
|
135
|
-
|
220
|
+
def generate_schemas
|
221
|
+
SchemaWriter.new(name, vectors, @vector_service, manifold_yaml, logger)
|
222
|
+
.write_schemas(tables_directory)
|
136
223
|
end
|
137
224
|
|
138
225
|
def any_vectors?
|
@@ -145,12 +232,9 @@ module Manifold
|
|
145
232
|
|
146
233
|
def generate_terraform
|
147
234
|
terraform_generator = TerraformGenerator.new(name, vectors, @vector_service, manifold_yaml)
|
235
|
+
terraform_generator.manifold_config = manifold_yaml
|
148
236
|
terraform_generator.generate(terraform_main_path)
|
149
237
|
end
|
150
|
-
|
151
|
-
def manifold_schema_json
|
152
|
-
JSON.pretty_generate(manifold_schema)
|
153
|
-
end
|
154
238
|
end
|
155
239
|
end
|
156
240
|
end
|
data/lib/manifold/cli.rb
CHANGED
@@ -4,7 +4,7 @@ module Manifold
|
|
4
4
|
# CLI provides command line interface functionality
|
5
5
|
# for creating and managing umbrella projects for data management.
|
6
6
|
class CLI < Thor
|
7
|
-
attr_accessor :logger
|
7
|
+
attr_accessor :logger
|
8
8
|
|
9
9
|
def initialize(*args, logger: Logger.new($stdout))
|
10
10
|
super(*args)
|
@@ -12,20 +12,31 @@ timestamp:
|
|
12
12
|
interval: HOUR
|
13
13
|
field: timestamp
|
14
14
|
|
15
|
-
contexts:
|
16
|
-
paid: IS_PAID(context.location)
|
17
|
-
organic: IS_ORGANIC(context.location)
|
18
|
-
paidOrganic:
|
19
|
-
fields:
|
20
|
-
- paid
|
21
|
-
- organic
|
22
|
-
operator: AND
|
23
|
-
|
24
15
|
metrics:
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
16
|
+
renders:
|
17
|
+
breakouts:
|
18
|
+
paid: IS_PAID(context.location)
|
19
|
+
organic: IS_ORGANIC(context.location)
|
20
|
+
|
21
|
+
aggregations:
|
22
|
+
countif: renderCount
|
23
|
+
sumif:
|
24
|
+
sequenceSum:
|
25
|
+
field: context.sequence
|
26
|
+
|
27
|
+
source: my_project.my_dataset.my_table
|
28
|
+
filter: timestamp >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 90 DAY)
|
29
|
+
|
30
|
+
taps:
|
31
|
+
breakouts:
|
32
|
+
paid: IS_PAID(context.location)
|
33
|
+
organic: IS_ORGANIC(context.location)
|
34
|
+
|
35
|
+
aggregations:
|
36
|
+
countif: tapCount
|
37
|
+
sumif:
|
38
|
+
sequenceSum:
|
39
|
+
field: context.sequence
|
29
40
|
|
30
|
-
source: my_project.my_dataset.my_table
|
31
|
-
filter: timestamp >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 90 DAY)
|
41
|
+
source: my_project.my_dataset.my_table
|
42
|
+
filter: timestamp >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 90 DAY)
|
@@ -2,29 +2,274 @@
|
|
2
2
|
|
3
3
|
module Manifold
|
4
4
|
module Terraform
|
5
|
+
# Handles building metrics SQL for manifold routines
|
6
|
+
class MetricsBuilder
|
7
|
+
def initialize(manifold_config)
|
8
|
+
@manifold_config = manifold_config
|
9
|
+
end
|
10
|
+
|
11
|
+
def build_metrics_struct
|
12
|
+
return "" unless @manifold_config["metrics"]
|
13
|
+
|
14
|
+
metric_groups = @manifold_config["metrics"].map do |group_name, group_config|
|
15
|
+
build_group_struct(group_name, group_config)
|
16
|
+
end
|
17
|
+
|
18
|
+
metric_groups.join(",\n")
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def build_group_struct(group_name, group_config)
|
24
|
+
return "" unless valid_group_config?(group_config)
|
25
|
+
|
26
|
+
breakout_structs = build_breakout_structs(group_config)
|
27
|
+
return "" if breakout_structs.empty?
|
28
|
+
|
29
|
+
"\tSTRUCT(\n#{breakout_structs.join(",\n")}\n\t) AS #{group_name}"
|
30
|
+
end
|
31
|
+
|
32
|
+
def valid_group_config?(group_config)
|
33
|
+
group_config["breakouts"] &&
|
34
|
+
group_config["aggregations"] &&
|
35
|
+
!group_config["breakouts"].empty? &&
|
36
|
+
!group_config["aggregations"].empty?
|
37
|
+
end
|
38
|
+
|
39
|
+
def build_breakout_structs(group_config)
|
40
|
+
group_config["breakouts"].map do |name, config|
|
41
|
+
build_breakout_struct(name, config, group_config)
|
42
|
+
end.compact
|
43
|
+
end
|
44
|
+
|
45
|
+
def build_breakout_struct(name, config, group_config)
|
46
|
+
condition = build_breakout_condition(name, config, group_config)
|
47
|
+
metrics = build_breakout_metrics(group_config, condition)
|
48
|
+
return if metrics.empty?
|
49
|
+
|
50
|
+
"\t\tSTRUCT(\n\t\t\t#{metrics}\n\t\t) AS #{name}"
|
51
|
+
end
|
52
|
+
|
53
|
+
def build_breakout_metrics(group_config, condition)
|
54
|
+
metrics = []
|
55
|
+
add_count_metrics(metrics, group_config, condition)
|
56
|
+
add_sum_metrics(metrics, group_config, condition)
|
57
|
+
metrics.join(",\n\t\t\t")
|
58
|
+
end
|
59
|
+
|
60
|
+
def add_count_metrics(metrics, group_config, condition)
|
61
|
+
return unless group_config.dig("aggregations", "countif")
|
62
|
+
|
63
|
+
metrics << "COUNTIF(#{condition}) AS #{group_config["aggregations"]["countif"]}"
|
64
|
+
end
|
65
|
+
|
66
|
+
def add_sum_metrics(metrics, group_config, condition)
|
67
|
+
group_config.dig("aggregations", "sumif")&.each do |name, config|
|
68
|
+
metrics << "SUM(IF(#{condition}, #{config["field"]}, 0)) AS #{name}"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def build_breakout_condition(_name, config, group_config)
|
73
|
+
return config unless config.is_a?(Hash)
|
74
|
+
|
75
|
+
operator = config["operator"]
|
76
|
+
fields = config["fields"]
|
77
|
+
build_operator_condition(operator, fields, group_config)
|
78
|
+
end
|
79
|
+
|
80
|
+
def build_operator_condition(operator, fields, group_config)
|
81
|
+
conditions = fields.map { |f| group_config["breakouts"][f] }
|
82
|
+
case operator
|
83
|
+
when "AND", "OR" then join_conditions(conditions, operator)
|
84
|
+
when "NOT" then negate_condition(conditions.first)
|
85
|
+
when "NAND", "NOR" then negate_joined_conditions(conditions, operator[1..])
|
86
|
+
when "XOR" then build_xor_condition(conditions)
|
87
|
+
when "XNOR" then build_xnor_condition(conditions)
|
88
|
+
else config
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def join_conditions(conditions, operator)
|
93
|
+
conditions.join(" #{operator} ")
|
94
|
+
end
|
95
|
+
|
96
|
+
def negate_condition(condition)
|
97
|
+
"NOT (#{condition})"
|
98
|
+
end
|
99
|
+
|
100
|
+
def negate_joined_conditions(conditions, operator)
|
101
|
+
"NOT (#{join_conditions(conditions, operator)})"
|
102
|
+
end
|
103
|
+
|
104
|
+
def build_xor_condition(conditions)
|
105
|
+
"(#{conditions[0]} AND NOT #{conditions[1]}) OR (NOT #{conditions[0]} AND #{conditions[1]})"
|
106
|
+
end
|
107
|
+
|
108
|
+
def build_xnor_condition(conditions)
|
109
|
+
"(#{conditions[0]} AND #{conditions[1]}) OR (NOT #{conditions[0]} AND NOT #{conditions[1]})"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# Handles building SQL for manifold routines
|
114
|
+
class SQLBuilder
|
115
|
+
def initialize(name, manifold_config)
|
116
|
+
@name = name
|
117
|
+
@manifold_config = manifold_config
|
118
|
+
end
|
119
|
+
|
120
|
+
def build_manifold_merge_sql(_metrics_builder, &)
|
121
|
+
return "" unless valid_config?
|
122
|
+
|
123
|
+
<<~SQL
|
124
|
+
MERGE #{@name}.Manifold AS target USING (
|
125
|
+
#{build_metrics_cte(&)}
|
126
|
+
#{build_final_select}
|
127
|
+
) AS source
|
128
|
+
ON source.id = target.id AND source.timestamp = target.timestamp
|
129
|
+
#{build_merge_actions}
|
130
|
+
SQL
|
131
|
+
end
|
132
|
+
|
133
|
+
def build_dimensions_merge_sql(source_sql)
|
134
|
+
<<~SQL
|
135
|
+
MERGE #{@name}.Dimensions AS TARGET
|
136
|
+
USING (
|
137
|
+
#{source_sql}
|
138
|
+
) AS source
|
139
|
+
ON source.id = target.id
|
140
|
+
WHEN MATCHED THEN UPDATE SET target.dimensions = source.dimensions
|
141
|
+
WHEN NOT MATCHED THEN INSERT ROW;
|
142
|
+
SQL
|
143
|
+
end
|
144
|
+
|
145
|
+
private
|
146
|
+
|
147
|
+
def valid_config?
|
148
|
+
source_table && timestamp_field && @manifold_config["metrics"]
|
149
|
+
end
|
150
|
+
|
151
|
+
def source_table
|
152
|
+
first_group = @manifold_config["metrics"]&.values&.first
|
153
|
+
first_group&.dig("source")
|
154
|
+
end
|
155
|
+
|
156
|
+
def interval
|
157
|
+
@manifold_config&.dig("timestamp", "interval") || "DAY"
|
158
|
+
end
|
159
|
+
|
160
|
+
def where_clause
|
161
|
+
first_group = @manifold_config["metrics"]&.values&.first
|
162
|
+
return "" unless first_group&.dig("filter")
|
163
|
+
|
164
|
+
"WHERE #{first_group["filter"]}"
|
165
|
+
end
|
166
|
+
|
167
|
+
def timestamp_field
|
168
|
+
@manifold_config&.dig("timestamp", "field")
|
169
|
+
end
|
170
|
+
|
171
|
+
def build_metrics_cte(&)
|
172
|
+
<<~SQL
|
173
|
+
WITH Metrics AS (
|
174
|
+
#{build_metrics_select(&)}
|
175
|
+
)
|
176
|
+
SQL
|
177
|
+
end
|
178
|
+
|
179
|
+
def build_metrics_select(&block)
|
180
|
+
<<~SQL
|
181
|
+
SELECT
|
182
|
+
id,
|
183
|
+
TIMESTAMP_TRUNC(#{timestamp_field}, #{interval}) timestamp,
|
184
|
+
STRUCT(
|
185
|
+
#{block.call}
|
186
|
+
) AS metrics
|
187
|
+
FROM #{source_table}
|
188
|
+
#{where_clause}
|
189
|
+
GROUP BY 1, 2
|
190
|
+
SQL
|
191
|
+
end
|
192
|
+
|
193
|
+
def build_final_select
|
194
|
+
<<~SQL
|
195
|
+
SELECT
|
196
|
+
id,
|
197
|
+
timestamp,
|
198
|
+
Dimensions.dimensions,
|
199
|
+
Metrics.metrics
|
200
|
+
FROM Metrics
|
201
|
+
LEFT JOIN #{@name}.Dimensions USING (id)
|
202
|
+
SQL
|
203
|
+
end
|
204
|
+
|
205
|
+
def build_merge_actions
|
206
|
+
<<~SQL
|
207
|
+
WHEN MATCHED THEN
|
208
|
+
UPDATE SET
|
209
|
+
metrics = source.metrics,
|
210
|
+
dimensions = source.dimensions
|
211
|
+
WHEN NOT MATCHED THEN
|
212
|
+
INSERT ROW;
|
213
|
+
SQL
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
# Handles building table configurations
|
218
|
+
class TableConfigBuilder
|
219
|
+
def initialize(name)
|
220
|
+
@name = name
|
221
|
+
end
|
222
|
+
|
223
|
+
def build_table_configs
|
224
|
+
{
|
225
|
+
"dimensions" => dimensions_table_config,
|
226
|
+
"manifold" => manifold_table_config
|
227
|
+
}
|
228
|
+
end
|
229
|
+
|
230
|
+
private
|
231
|
+
|
232
|
+
def dimensions_table_config
|
233
|
+
build_table_config("Dimensions")
|
234
|
+
end
|
235
|
+
|
236
|
+
def manifold_table_config
|
237
|
+
build_table_config("Manifold")
|
238
|
+
end
|
239
|
+
|
240
|
+
def build_table_config(table_id)
|
241
|
+
{
|
242
|
+
"dataset_id" => @name,
|
243
|
+
"project" => "${var.project_id}",
|
244
|
+
"table_id" => table_id,
|
245
|
+
"schema" => "${file(\"${path.module}/tables/#{table_id.downcase}.json\")}",
|
246
|
+
"depends_on" => ["google_bigquery_dataset.#{@name}"]
|
247
|
+
}
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
5
251
|
# Represents a Terraform configuration for a Manifold workspace.
|
6
252
|
class WorkspaceConfiguration < Configuration
|
7
253
|
attr_reader :name
|
254
|
+
attr_writer :dimensions_config, :manifold_config
|
8
255
|
|
9
256
|
def initialize(name)
|
10
257
|
super()
|
11
258
|
@name = name
|
12
259
|
@vectors = []
|
13
|
-
@
|
260
|
+
@dimensions_config = nil
|
14
261
|
end
|
15
262
|
|
16
263
|
def add_vector(vector_config)
|
17
264
|
@vectors << vector_config
|
18
265
|
end
|
19
266
|
|
20
|
-
attr_writer :merge_config
|
21
|
-
|
22
267
|
def as_json
|
23
268
|
{
|
24
269
|
"variable" => variables_block,
|
25
270
|
"resource" => {
|
26
271
|
"google_bigquery_dataset" => dataset_config,
|
27
|
-
"google_bigquery_table" =>
|
272
|
+
"google_bigquery_table" => TableConfigBuilder.new(name).build_table_configs,
|
28
273
|
"google_bigquery_routine" => routine_config
|
29
274
|
}.compact
|
30
275
|
}
|
@@ -51,69 +296,40 @@ module Manifold
|
|
51
296
|
}
|
52
297
|
end
|
53
298
|
|
54
|
-
def
|
55
|
-
{
|
56
|
-
"
|
57
|
-
"
|
58
|
-
}
|
59
|
-
end
|
299
|
+
def routine_config
|
300
|
+
routines = {
|
301
|
+
"merge_dimensions" => dimensions_routine_attributes
|
302
|
+
# "merge_manifold" => manifold_routine_attributes
|
303
|
+
}.compact
|
60
304
|
|
61
|
-
|
62
|
-
{
|
63
|
-
"dataset_id" => name,
|
64
|
-
"project" => "${var.project_id}",
|
65
|
-
"table_id" => "Dimensions",
|
66
|
-
"schema" => "${file(\"${path.module}/tables/dimensions.json\")}",
|
67
|
-
"depends_on" => ["google_bigquery_dataset.#{name}"]
|
68
|
-
}
|
305
|
+
routines.empty? ? nil : routines
|
69
306
|
end
|
70
307
|
|
71
|
-
def
|
308
|
+
def dimensions_routine_attributes
|
309
|
+
return nil if @vectors.empty? || @dimensions_config.nil?
|
310
|
+
|
72
311
|
{
|
73
312
|
"dataset_id" => name,
|
74
313
|
"project" => "${var.project_id}",
|
75
|
-
"
|
76
|
-
"
|
314
|
+
"routine_id" => "merge_dimensions",
|
315
|
+
"routine_type" => "PROCEDURE",
|
316
|
+
"language" => "SQL",
|
317
|
+
"definition_body" => "${file(\"${path.module}/routines/merge_dimensions.sql\")}",
|
77
318
|
"depends_on" => ["google_bigquery_dataset.#{name}"]
|
78
319
|
}
|
79
320
|
end
|
80
321
|
|
81
|
-
def
|
82
|
-
return nil if @vectors.empty? || @merge_config.nil?
|
83
|
-
|
84
|
-
{
|
85
|
-
"merge_dimensions" => routine_attributes
|
86
|
-
}
|
87
|
-
end
|
88
|
-
|
89
|
-
def routine_attributes
|
322
|
+
def manifold_routine_attributes
|
90
323
|
{
|
91
324
|
"dataset_id" => name,
|
92
325
|
"project" => "${var.project_id}",
|
93
|
-
"routine_id" => "
|
326
|
+
"routine_id" => "merge_manifold",
|
94
327
|
"routine_type" => "PROCEDURE",
|
95
328
|
"language" => "SQL",
|
96
|
-
"definition_body" =>
|
329
|
+
"definition_body" => "${file(\"${path.module}/routines/merge_manifold.sql\")}",
|
97
330
|
"depends_on" => ["google_bigquery_dataset.#{name}"]
|
98
331
|
}
|
99
332
|
end
|
100
|
-
|
101
|
-
def merge_routine_definition
|
102
|
-
source_sql = read_source_sql(@merge_config["source"])
|
103
|
-
<<~SQL
|
104
|
-
MERGE #{name}.Dimensions AS TARGET
|
105
|
-
USING (
|
106
|
-
#{source_sql}
|
107
|
-
) AS source
|
108
|
-
ON source.id = target.id
|
109
|
-
WHEN MATCHED THEN UPDATE SET target.dimensions = source.dimensions
|
110
|
-
WHEN NOT MATCHED THEN INSERT ROW;
|
111
|
-
SQL
|
112
|
-
end
|
113
|
-
|
114
|
-
def read_source_sql(source_path)
|
115
|
-
File.read(Pathname.pwd.join(source_path))
|
116
|
-
end
|
117
333
|
end
|
118
334
|
end
|
119
335
|
end
|
data/lib/manifold/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: manifold-cli
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.17
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- claytongentry
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-02-
|
10
|
+
date: 2025-02-18 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: thor
|