manifold-cli 0.0.15 → 0.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 465e6236b9114a9a9170502e55646ae094a392ee25bb9ce29af81046fdb1b386
4
- data.tar.gz: b8dc973ee3caf57fcde8919b93580c2278850b90aaa1df82425f9a819fc66a2d
3
+ metadata.gz: 38a614204bb12682a02b29ea889a9f44a6aab5af58b746322ba607252f9ff272
4
+ data.tar.gz: 1ee65656d387eabc11fe6eb9effcf6119ec28210d32efc7bf0b8ae57ea37c43b
5
5
  SHA512:
6
- metadata.gz: 69d18cf361bdbf945685aab0264c3222c7c692e45141e3e10352238523f180df0b35f97689bff3bb71712897ddb52fb1f037c8a951762fa1818d6d28a560944b
7
- data.tar.gz: f8e29a241c6f76a6632e5e47c421ac03d0736c1af3d7e8e2a374026d30f76aded46c02359aaa1a0d29b720c9bdc65355ed4e44a761cfe1af549d730988982e77
6
+ metadata.gz: 762a4575c8423177ccbd69622da95d034aa311f51733f4f6d123199ec35af390acee194ce543c907f127939ed6dae8c15de97d11183b2bb508b0b3a8f1195e02
7
+ data.tar.gz: 359ae735a6be1fec84b46a0410395feaaef0b12fee3df69f887cfc7723b57d58341dc1684ea93f401b91b5e7510e1bb085175af2bc193578471f72f4b85c955a
@@ -4,6 +4,8 @@ module Manifold
4
4
  module API
5
5
  # Handles terraform configuration generation
6
6
  class TerraformGenerator
7
+ attr_accessor :manifold_config
8
+
7
9
  def initialize(name, vectors, vector_service, manifold_yaml)
8
10
  @name = name
9
11
  @vectors = vectors
@@ -18,6 +20,7 @@ module Manifold
18
20
  config.add_vector(vector_config)
19
21
  end
20
22
  config.merge_config = @manifold_yaml["dimensions"]&.fetch("merge", nil) if @manifold_yaml["dimensions"]
23
+ config.manifold_config = @manifold_yaml
21
24
  config.write(path)
22
25
  end
23
26
  end
@@ -145,6 +148,7 @@ module Manifold
145
148
 
146
149
  def generate_terraform
147
150
  terraform_generator = TerraformGenerator.new(name, vectors, @vector_service, manifold_yaml)
151
+ terraform_generator.manifold_config = manifold_yaml
148
152
  terraform_generator.generate(terraform_main_path)
149
153
  end
150
154
 
@@ -2,9 +2,222 @@
2
2
 
3
3
  module Manifold
4
4
  module Terraform
5
+ # Handles building metrics SQL for manifold routines
6
+ class MetricsBuilder
7
+ def initialize(manifold_config)
8
+ @manifold_config = manifold_config
9
+ end
10
+
11
+ def build_metrics_struct
12
+ return "" unless @manifold_config&.dig("contexts") && @manifold_config&.dig("metrics")
13
+
14
+ context_structs = @manifold_config["contexts"].map do |name, config|
15
+ condition = build_context_condition(name, config)
16
+ metrics = build_context_metrics(condition)
17
+ "STRUCT(#{metrics}) AS #{name}"
18
+ end
19
+
20
+ context_structs.join(",\n")
21
+ end
22
+
23
+ private
24
+
25
+ def build_context_metrics(condition)
26
+ metrics = []
27
+ add_count_metrics(metrics, condition)
28
+ add_sum_metrics(metrics, condition)
29
+ metrics.join(",\n")
30
+ end
31
+
32
+ def add_count_metrics(metrics, condition)
33
+ return unless @manifold_config.dig("metrics", "countif")
34
+
35
+ metrics << "COUNTIF(#{condition}) AS #{@manifold_config["metrics"]["countif"]}"
36
+ end
37
+
38
+ def add_sum_metrics(metrics, condition)
39
+ @manifold_config.dig("metrics", "sumif")&.each do |name, config|
40
+ metrics << "SUM(IF(#{condition}, #{config["field"]}, 0)) AS #{name}"
41
+ end
42
+ end
43
+
44
+ def build_context_condition(_name, config)
45
+ return config unless config.is_a?(Hash)
46
+
47
+ operator = config["operator"]
48
+ fields = config["fields"]
49
+ build_operator_condition(operator, fields)
50
+ end
51
+
52
+ def build_operator_condition(operator, fields)
53
+ conditions = fields.map { |f| @manifold_config["contexts"][f] }
54
+ case operator
55
+ when "AND", "OR" then join_conditions(conditions, operator)
56
+ when "NOT" then negate_condition(conditions.first)
57
+ when "NAND", "NOR" then negate_joined_conditions(conditions, operator[1..])
58
+ when "XOR" then build_xor_condition(conditions)
59
+ when "XNOR" then build_xnor_condition(conditions)
60
+ else config
61
+ end
62
+ end
63
+
64
+ def join_conditions(conditions, operator)
65
+ conditions.join(" #{operator} ")
66
+ end
67
+
68
+ def negate_condition(condition)
69
+ "NOT (#{condition})"
70
+ end
71
+
72
+ def negate_joined_conditions(conditions, operator)
73
+ "NOT (#{join_conditions(conditions, operator)})"
74
+ end
75
+
76
+ def build_xor_condition(conditions)
77
+ "(#{conditions[0]} AND NOT #{conditions[1]}) OR (NOT #{conditions[0]} AND #{conditions[1]})"
78
+ end
79
+
80
+ def build_xnor_condition(conditions)
81
+ "(#{conditions[0]} AND #{conditions[1]}) OR (NOT #{conditions[0]} AND NOT #{conditions[1]})"
82
+ end
83
+ end
84
+
85
+ # Handles building SQL for manifold routines
86
+ class SQLBuilder
87
+ def initialize(name, manifold_config)
88
+ @name = name
89
+ @manifold_config = manifold_config
90
+ end
91
+
92
+ def build_manifold_merge_sql(_metrics_builder, &)
93
+ return "" unless valid_config?
94
+
95
+ <<~SQL
96
+ MERGE #{@name}.Manifold AS target USING (
97
+ #{build_metrics_cte(&)}
98
+ #{build_final_select}
99
+ ) AS source
100
+ ON source.id = target.id AND source.timestamp = target.timestamp
101
+ #{build_merge_actions}
102
+ SQL
103
+ end
104
+
105
+ def build_dimensions_merge_sql(source_sql)
106
+ <<~SQL
107
+ MERGE #{@name}.Dimensions AS TARGET
108
+ USING (
109
+ #{source_sql}
110
+ ) AS source
111
+ ON source.id = target.id
112
+ WHEN MATCHED THEN UPDATE SET target.dimensions = source.dimensions
113
+ WHEN NOT MATCHED THEN INSERT ROW;
114
+ SQL
115
+ end
116
+
117
+ private
118
+
119
+ def valid_config?
120
+ source_table && timestamp_field
121
+ end
122
+
123
+ def build_metrics_cte(&)
124
+ <<~SQL
125
+ WITH Metrics AS (
126
+ #{build_metrics_select(&)}
127
+ )
128
+ SQL
129
+ end
130
+
131
+ def build_metrics_select(&block)
132
+ <<~SQL
133
+ SELECT
134
+ dimensions.id id,
135
+ TIMESTAMP_TRUNC(#{timestamp_field}, #{interval}) timestamp,
136
+ STRUCT(
137
+ #{block.call}
138
+ ) AS metrics
139
+ FROM `#{source_table}`
140
+ #{where_clause}
141
+ GROUP BY 1, 2
142
+ SQL
143
+ end
144
+
145
+ def build_final_select
146
+ <<~SQL
147
+ SELECT id, timestamp, #{@name}.Dimensions.dimensions, Metrics.metrics
148
+ FROM Metrics
149
+ LEFT JOIN #{@name}.Dimensions USING (id)
150
+ SQL
151
+ end
152
+
153
+ def build_merge_actions
154
+ <<~SQL
155
+ WHEN MATCHED THEN
156
+ UPDATE SET
157
+ metrics = source.metrics,
158
+ dimensions = source.dimensions
159
+ WHEN NOT MATCHED THEN
160
+ INSERT ROW;
161
+ SQL
162
+ end
163
+
164
+ def source_table
165
+ @manifold_config["source"]
166
+ end
167
+
168
+ def interval
169
+ @manifold_config&.dig("timestamp", "interval") || "DAY"
170
+ end
171
+
172
+ def where_clause
173
+ return "" unless @manifold_config["filter"]
174
+
175
+ "WHERE #{@manifold_config["filter"]}"
176
+ end
177
+
178
+ def timestamp_field
179
+ @manifold_config&.dig("timestamp", "field")
180
+ end
181
+ end
182
+
183
+ # Handles building table configurations
184
+ class TableConfigBuilder
185
+ def initialize(name)
186
+ @name = name
187
+ end
188
+
189
+ def build_table_configs
190
+ {
191
+ "dimensions" => dimensions_table_config,
192
+ "manifold" => manifold_table_config
193
+ }
194
+ end
195
+
196
+ private
197
+
198
+ def dimensions_table_config
199
+ build_table_config("Dimensions")
200
+ end
201
+
202
+ def manifold_table_config
203
+ build_table_config("Manifold")
204
+ end
205
+
206
+ def build_table_config(table_id)
207
+ {
208
+ "dataset_id" => @name,
209
+ "project" => "${var.project_id}",
210
+ "table_id" => table_id,
211
+ "schema" => "${file(\"${path.module}/tables/#{table_id.downcase}.json\")}",
212
+ "depends_on" => ["google_bigquery_dataset.#{@name}"]
213
+ }
214
+ end
215
+ end
216
+
5
217
  # Represents a Terraform configuration for a Manifold workspace.
6
218
  class WorkspaceConfiguration < Configuration
7
219
  attr_reader :name
220
+ attr_writer :merge_config, :manifold_config
8
221
 
9
222
  def initialize(name)
10
223
  super()
@@ -17,14 +230,12 @@ module Manifold
17
230
  @vectors << vector_config
18
231
  end
19
232
 
20
- attr_writer :merge_config
21
-
22
233
  def as_json
23
234
  {
24
235
  "variable" => variables_block,
25
236
  "resource" => {
26
237
  "google_bigquery_dataset" => dataset_config,
27
- "google_bigquery_table" => table_config,
238
+ "google_bigquery_table" => TableConfigBuilder.new(name).build_table_configs,
28
239
  "google_bigquery_routine" => routine_config
29
240
  }.compact
30
241
  }
@@ -51,68 +262,68 @@ module Manifold
51
262
  }
52
263
  end
53
264
 
54
- def table_config
55
- {
56
- "dimensions" => dimensions_table_config,
57
- "manifold" => manifold_table_config
58
- }
59
- end
265
+ def routine_config
266
+ routines = {
267
+ "merge_dimensions" => dimensions_routine_attributes,
268
+ "merge_manifold" => manifold_routine_attributes
269
+ }.compact
60
270
 
61
- def dimensions_table_config
62
- {
63
- "dataset_id" => name,
64
- "project" => "${var.project_id}",
65
- "table_id" => "Dimensions",
66
- "schema" => "${file(\"${path.module}/tables/dimensions.json\")}",
67
- "depends_on" => ["google_bigquery_dataset.#{name}"]
68
- }
271
+ routines.empty? ? nil : routines
69
272
  end
70
273
 
71
- def manifold_table_config
274
+ def dimensions_routine_attributes
275
+ return nil if @vectors.empty? || @merge_config.nil?
276
+
72
277
  {
73
278
  "dataset_id" => name,
74
279
  "project" => "${var.project_id}",
75
- "table_id" => "Manifold",
76
- "schema" => "${file(\"${path.module}/tables/manifold.json\")}",
280
+ "routine_id" => "merge_dimensions",
281
+ "routine_type" => "PROCEDURE",
282
+ "language" => "SQL",
283
+ "definition_body" => dimensions_merge_routine,
77
284
  "depends_on" => ["google_bigquery_dataset.#{name}"]
78
285
  }
79
286
  end
80
287
 
81
- def routine_config
82
- return nil if @vectors.empty? || @merge_config.nil?
288
+ def dimensions_merge_routine
289
+ return "" if @vectors.empty? || @merge_config.nil?
83
290
 
84
- {
85
- "merge_dimensions" => routine_attributes
86
- }
291
+ source_sql = File.read(Pathname.pwd.join(@merge_config["source"]))
292
+ SQLBuilder.new(name, @manifold_config).build_dimensions_merge_sql(source_sql)
87
293
  end
88
294
 
89
- def routine_attributes
295
+ def manifold_routine_attributes
296
+ return nil unless valid_manifold_config?
297
+
90
298
  {
91
299
  "dataset_id" => name,
92
300
  "project" => "${var.project_id}",
93
- "routine_id" => "merge_dimensions",
301
+ "routine_id" => "merge_manifold",
94
302
  "routine_type" => "PROCEDURE",
95
303
  "language" => "SQL",
96
- "definition_body" => merge_routine_definition,
304
+ "definition_body" => manifold_merge_routine,
97
305
  "depends_on" => ["google_bigquery_dataset.#{name}"]
98
306
  }
99
307
  end
100
308
 
101
- def merge_routine_definition
102
- source_sql = read_source_sql(@merge_config["source"])
103
- <<~SQL
104
- MERGE #{name}.Dimensions AS TARGET
105
- USING (
106
- #{source_sql}
107
- ) AS source
108
- ON source.id = target.id
109
- WHEN MATCHED THEN UPDATE SET target.dimensions = source.dimensions
110
- WHEN NOT MATCHED THEN INSERT ROW;
111
- SQL
309
+ def manifold_merge_routine
310
+ metrics_builder = MetricsBuilder.new(@manifold_config)
311
+ sql_builder = SQLBuilder.new(name, @manifold_config)
312
+ sql_builder.build_manifold_merge_sql(metrics_builder) do
313
+ metrics_builder.build_metrics_struct
314
+ end
315
+ end
316
+
317
+ def valid_manifold_config?
318
+ return false unless @manifold_config
319
+
320
+ required_fields_present?
112
321
  end
113
322
 
114
- def read_source_sql(source_path)
115
- File.read(Pathname.pwd.join(source_path))
323
+ def required_fields_present?
324
+ %w[source timestamp.field contexts metrics].all? do |field|
325
+ @manifold_config&.dig(*field.split("."))
326
+ end
116
327
  end
117
328
  end
118
329
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Manifold
4
- VERSION = "0.0.15"
4
+ VERSION = "0.0.16"
5
5
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: manifold-cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.15
4
+ version: 0.0.16
5
5
  platform: ruby
6
6
  authors:
7
7
  - claytongentry
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2025-02-09 00:00:00.000000000 Z
10
+ date: 2025-02-11 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: thor