manifold-cli 0.0.14 → 0.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 38d11b038b09c8e1f83ef07ff4b238fcb07edaeeb1a36518a182a0963c5d5589
4
- data.tar.gz: 8ad9000c2ed33ff8a8324ce202b2c15b367b894de756099417479e9354750864
3
+ metadata.gz: 38a614204bb12682a02b29ea889a9f44a6aab5af58b746322ba607252f9ff272
4
+ data.tar.gz: 1ee65656d387eabc11fe6eb9effcf6119ec28210d32efc7bf0b8ae57ea37c43b
5
5
  SHA512:
6
- metadata.gz: 79a51d73c4d63c7fc02e13336e1cdd51be4d93023eac402134e1f3be4ecf067d016813b9a533318f37708469ad001a568abdb436b96af3ccace25db0f50ce1d5
7
- data.tar.gz: 4143c27f33fbe4b6a7bdc16282080a9c3cd9d5f507c0ded3db633b5a571bbb0a5a39659def1eb9342e8a711875a65c9daf000f15d6e54aaef53e38301de104b8
6
+ metadata.gz: 762a4575c8423177ccbd69622da95d034aa311f51733f4f6d123199ec35af390acee194ce543c907f127939ed6dae8c15de97d11183b2bb508b0b3a8f1195e02
7
+ data.tar.gz: 359ae735a6be1fec84b46a0410395feaaef0b12fee3df69f887cfc7723b57d58341dc1684ea93f401b91b5e7510e1bb085175af2bc193578471f72f4b85c955a
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Manifold
4
+ module API
5
+ # Handles schema generation for Manifold tables
6
+ class SchemaGenerator
7
+ VALID_OPERATORS = %w[AND OR NOT NAND NOR XOR XNOR].freeze
8
+
9
+ def initialize(dimensions_fields, manifold_yaml)
10
+ @dimensions_fields = dimensions_fields
11
+ @manifold_yaml = manifold_yaml
12
+ end
13
+
14
+ def dimensions_schema
15
+ [
16
+ { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" },
17
+ { "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED",
18
+ "fields" => @dimensions_fields }
19
+ ]
20
+ end
21
+
22
+ def manifold_schema
23
+ [
24
+ { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" },
25
+ { "type" => "TIMESTAMP", "name" => "timestamp", "mode" => "REQUIRED" },
26
+ { "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED",
27
+ "fields" => @dimensions_fields },
28
+ { "type" => "RECORD", "name" => "metrics", "mode" => "REQUIRED",
29
+ "fields" => metrics_fields }
30
+ ]
31
+ end
32
+
33
+ private
34
+
35
+ def metrics_fields
36
+ return [] unless @manifold_yaml["contexts"] && @manifold_yaml["metrics"]
37
+
38
+ @manifold_yaml["contexts"].map do |context_name, _context_config|
39
+ {
40
+ "name" => context_name,
41
+ "type" => "RECORD",
42
+ "mode" => "NULLABLE",
43
+ "fields" => context_metrics_fields
44
+ }
45
+ end
46
+ end
47
+
48
+ def context_metrics_fields
49
+ [
50
+ *countif_fields,
51
+ *sumif_fields
52
+ ]
53
+ end
54
+
55
+ def countif_fields
56
+ return [] unless @manifold_yaml.dig("metrics", "countif")
57
+
58
+ [{
59
+ "name" => @manifold_yaml["metrics"]["countif"],
60
+ "type" => "INTEGER",
61
+ "mode" => "NULLABLE"
62
+ }]
63
+ end
64
+
65
+ def sumif_fields
66
+ return [] unless @manifold_yaml.dig("metrics", "sumif")
67
+
68
+ @manifold_yaml["metrics"]["sumif"].keys.map do |metric_name|
69
+ {
70
+ "name" => metric_name,
71
+ "type" => "INTEGER",
72
+ "mode" => "NULLABLE"
73
+ }
74
+ end
75
+ end
76
+
77
+ def validate_operator!(operator)
78
+ return if VALID_OPERATORS.include?(operator)
79
+
80
+ raise ArgumentError, "Invalid operator: #{operator}. Valid operators are: #{VALID_OPERATORS.join(", ")}"
81
+ end
82
+ end
83
+ end
84
+ end
@@ -2,6 +2,29 @@
2
2
 
3
3
  module Manifold
4
4
  module API
5
+ # Handles terraform configuration generation
6
+ class TerraformGenerator
7
+ attr_accessor :manifold_config
8
+
9
+ def initialize(name, vectors, vector_service, manifold_yaml)
10
+ @name = name
11
+ @vectors = vectors
12
+ @vector_service = vector_service
13
+ @manifold_yaml = manifold_yaml
14
+ end
15
+
16
+ def generate(path)
17
+ config = Terraform::WorkspaceConfiguration.new(@name)
18
+ @vectors.each do |vector|
19
+ vector_config = @vector_service.load_vector_config(vector)
20
+ config.add_vector(vector_config)
21
+ end
22
+ config.merge_config = @manifold_yaml["dimensions"]&.fetch("merge", nil) if @manifold_yaml["dimensions"]
23
+ config.manifold_config = @manifold_yaml
24
+ config.write(path)
25
+ end
26
+ end
27
+
5
28
  # Encapsulates a single manifold.
6
29
  class Workspace
7
30
  attr_reader :name, :template_path, :logger
@@ -27,11 +50,17 @@ module Manifold
27
50
  end
28
51
 
29
52
  def generate(with_terraform: false)
30
- return unless manifold_exists? && any_vectors?
53
+ return nil unless manifold_exists? && any_vectors?
31
54
 
55
+ tables_directory.mkpath
32
56
  generate_dimensions
33
- generate_terraform if with_terraform
57
+ generate_manifold
34
58
  logger.info("Generated BigQuery dimensions table schema for workspace '#{name}'.")
59
+
60
+ return unless with_terraform
61
+
62
+ generate_terraform
63
+ logger.info("Generated Terraform configuration for workspace '#{name}'.")
35
64
  end
36
65
 
37
66
  def tables_directory
@@ -74,16 +103,28 @@ module Manifold
74
103
  dimensions_path.write(dimensions_schema_json.concat("\n"))
75
104
  end
76
105
 
106
+ def generate_manifold
107
+ manifold_schema_path.write(manifold_schema_json.concat("\n"))
108
+ end
109
+
110
+ def manifold_schema_path
111
+ tables_directory.join("manifold.json")
112
+ end
113
+
114
+ def schema_generator
115
+ @schema_generator ||= SchemaGenerator.new(dimensions_fields, manifold_yaml)
116
+ end
117
+
118
+ def manifold_schema
119
+ schema_generator.manifold_schema
120
+ end
121
+
77
122
  def dimensions_schema
78
- [
79
- { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" },
80
- { "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED",
81
- "fields" => dimensions_fields }
82
- ]
123
+ schema_generator.dimensions_schema
83
124
  end
84
125
 
85
126
  def dimensions_fields
86
- vectors.filter_map do |vector|
127
+ @dimensions_fields ||= vectors.filter_map do |vector|
87
128
  logger.info("Loading vector schema for '#{vector}'.")
88
129
  @vector_service.load_vector_schema(vector)
89
130
  end
@@ -106,13 +147,13 @@ module Manifold
106
147
  end
107
148
 
108
149
  def generate_terraform
109
- config = Terraform::WorkspaceConfiguration.new(name)
110
- vectors.each do |vector|
111
- vector_config = @vector_service.load_vector_config(vector)
112
- config.add_vector(vector_config)
113
- end
114
- config.merge_config = manifold_yaml["dimensions"]&.fetch("merge", nil) if manifold_yaml["dimensions"]
115
- config.write(terraform_main_path)
150
+ terraform_generator = TerraformGenerator.new(name, vectors, @vector_service, manifold_yaml)
151
+ terraform_generator.manifold_config = manifold_yaml
152
+ terraform_generator.generate(terraform_main_path)
153
+ end
154
+
155
+ def manifold_schema_json
156
+ JSON.pretty_generate(manifold_schema)
116
157
  end
117
158
  end
118
159
  end
@@ -8,23 +8,24 @@ dimensions:
8
8
  merge:
9
9
  source: lib/views/select_my_vector.sql
10
10
 
11
- metrics:
12
- - name: # Add your metric name here, e.g. Pageviews
13
-
14
- id:
15
- field: # Identify the field that uniquely identifies each manifold vector
16
- type: # Specify the type of that field, e.g. INTEGER
11
+ timestamp:
12
+ interval: HOUR
13
+ field: timestamp
17
14
 
18
- interval:
19
- type: # Specify the interval type, e.g. TIMESTAMP or DATE
20
- expression: # Compute the interval for the entry, e.g. TIMESTAMP_TRUNC(timestamp, HOUR)
15
+ contexts:
16
+ paid: IS_PAID(context.location)
17
+ organic: IS_ORGANIC(context.location)
18
+ paidOrganic:
19
+ fields:
20
+ - paid
21
+ - organic
22
+ operator: AND
21
23
 
22
- aggregations:
23
- # Add any aggregations this metric should present
24
+ metrics:
25
+ countif: tapCount
26
+ sumif:
27
+ sequenceSum:
28
+ field: context.sequence
24
29
 
25
- source:
26
- type: BIGQUERY_TABLE
27
- project: # Add your project name here
28
- dataset: # Add your dataset name here
29
- table: # Add your table name
30
- filter: # (optional) Add your filter condition here
30
+ source: my_project.my_dataset.my_table
31
+ filter: timestamp >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 90 DAY)
@@ -2,9 +2,222 @@
2
2
 
3
3
  module Manifold
4
4
  module Terraform
5
+ # Handles building metrics SQL for manifold routines
6
+ class MetricsBuilder
7
+ def initialize(manifold_config)
8
+ @manifold_config = manifold_config
9
+ end
10
+
11
+ def build_metrics_struct
12
+ return "" unless @manifold_config&.dig("contexts") && @manifold_config&.dig("metrics")
13
+
14
+ context_structs = @manifold_config["contexts"].map do |name, config|
15
+ condition = build_context_condition(name, config)
16
+ metrics = build_context_metrics(condition)
17
+ "STRUCT(#{metrics}) AS #{name}"
18
+ end
19
+
20
+ context_structs.join(",\n")
21
+ end
22
+
23
+ private
24
+
25
+ def build_context_metrics(condition)
26
+ metrics = []
27
+ add_count_metrics(metrics, condition)
28
+ add_sum_metrics(metrics, condition)
29
+ metrics.join(",\n")
30
+ end
31
+
32
+ def add_count_metrics(metrics, condition)
33
+ return unless @manifold_config.dig("metrics", "countif")
34
+
35
+ metrics << "COUNTIF(#{condition}) AS #{@manifold_config["metrics"]["countif"]}"
36
+ end
37
+
38
+ def add_sum_metrics(metrics, condition)
39
+ @manifold_config.dig("metrics", "sumif")&.each do |name, config|
40
+ metrics << "SUM(IF(#{condition}, #{config["field"]}, 0)) AS #{name}"
41
+ end
42
+ end
43
+
44
+ def build_context_condition(_name, config)
45
+ return config unless config.is_a?(Hash)
46
+
47
+ operator = config["operator"]
48
+ fields = config["fields"]
49
+ build_operator_condition(operator, fields)
50
+ end
51
+
52
+ def build_operator_condition(operator, fields)
53
+ conditions = fields.map { |f| @manifold_config["contexts"][f] }
54
+ case operator
55
+ when "AND", "OR" then join_conditions(conditions, operator)
56
+ when "NOT" then negate_condition(conditions.first)
57
+ when "NAND", "NOR" then negate_joined_conditions(conditions, operator[1..])
58
+ when "XOR" then build_xor_condition(conditions)
59
+ when "XNOR" then build_xnor_condition(conditions)
60
+ else config
61
+ end
62
+ end
63
+
64
+ def join_conditions(conditions, operator)
65
+ conditions.join(" #{operator} ")
66
+ end
67
+
68
+ def negate_condition(condition)
69
+ "NOT (#{condition})"
70
+ end
71
+
72
+ def negate_joined_conditions(conditions, operator)
73
+ "NOT (#{join_conditions(conditions, operator)})"
74
+ end
75
+
76
+ def build_xor_condition(conditions)
77
+ "(#{conditions[0]} AND NOT #{conditions[1]}) OR (NOT #{conditions[0]} AND #{conditions[1]})"
78
+ end
79
+
80
+ def build_xnor_condition(conditions)
81
+ "(#{conditions[0]} AND #{conditions[1]}) OR (NOT #{conditions[0]} AND NOT #{conditions[1]})"
82
+ end
83
+ end
84
+
85
+ # Handles building SQL for manifold routines
86
+ class SQLBuilder
87
+ def initialize(name, manifold_config)
88
+ @name = name
89
+ @manifold_config = manifold_config
90
+ end
91
+
92
+ def build_manifold_merge_sql(_metrics_builder, &)
93
+ return "" unless valid_config?
94
+
95
+ <<~SQL
96
+ MERGE #{@name}.Manifold AS target USING (
97
+ #{build_metrics_cte(&)}
98
+ #{build_final_select}
99
+ ) AS source
100
+ ON source.id = target.id AND source.timestamp = target.timestamp
101
+ #{build_merge_actions}
102
+ SQL
103
+ end
104
+
105
+ def build_dimensions_merge_sql(source_sql)
106
+ <<~SQL
107
+ MERGE #{@name}.Dimensions AS TARGET
108
+ USING (
109
+ #{source_sql}
110
+ ) AS source
111
+ ON source.id = target.id
112
+ WHEN MATCHED THEN UPDATE SET target.dimensions = source.dimensions
113
+ WHEN NOT MATCHED THEN INSERT ROW;
114
+ SQL
115
+ end
116
+
117
+ private
118
+
119
+ def valid_config?
120
+ source_table && timestamp_field
121
+ end
122
+
123
+ def build_metrics_cte(&)
124
+ <<~SQL
125
+ WITH Metrics AS (
126
+ #{build_metrics_select(&)}
127
+ )
128
+ SQL
129
+ end
130
+
131
+ def build_metrics_select(&block)
132
+ <<~SQL
133
+ SELECT
134
+ dimensions.id id,
135
+ TIMESTAMP_TRUNC(#{timestamp_field}, #{interval}) timestamp,
136
+ STRUCT(
137
+ #{block.call}
138
+ ) AS metrics
139
+ FROM `#{source_table}`
140
+ #{where_clause}
141
+ GROUP BY 1, 2
142
+ SQL
143
+ end
144
+
145
+ def build_final_select
146
+ <<~SQL
147
+ SELECT id, timestamp, #{@name}.Dimensions.dimensions, Metrics.metrics
148
+ FROM Metrics
149
+ LEFT JOIN #{@name}.Dimensions USING (id)
150
+ SQL
151
+ end
152
+
153
+ def build_merge_actions
154
+ <<~SQL
155
+ WHEN MATCHED THEN
156
+ UPDATE SET
157
+ metrics = source.metrics,
158
+ dimensions = source.dimensions
159
+ WHEN NOT MATCHED THEN
160
+ INSERT ROW;
161
+ SQL
162
+ end
163
+
164
+ def source_table
165
+ @manifold_config["source"]
166
+ end
167
+
168
+ def interval
169
+ @manifold_config&.dig("timestamp", "interval") || "DAY"
170
+ end
171
+
172
+ def where_clause
173
+ return "" unless @manifold_config["filter"]
174
+
175
+ "WHERE #{@manifold_config["filter"]}"
176
+ end
177
+
178
+ def timestamp_field
179
+ @manifold_config&.dig("timestamp", "field")
180
+ end
181
+ end
182
+
183
+ # Handles building table configurations
184
+ class TableConfigBuilder
185
+ def initialize(name)
186
+ @name = name
187
+ end
188
+
189
+ def build_table_configs
190
+ {
191
+ "dimensions" => dimensions_table_config,
192
+ "manifold" => manifold_table_config
193
+ }
194
+ end
195
+
196
+ private
197
+
198
+ def dimensions_table_config
199
+ build_table_config("Dimensions")
200
+ end
201
+
202
+ def manifold_table_config
203
+ build_table_config("Manifold")
204
+ end
205
+
206
+ def build_table_config(table_id)
207
+ {
208
+ "dataset_id" => @name,
209
+ "project" => "${var.project_id}",
210
+ "table_id" => table_id,
211
+ "schema" => "${file(\"${path.module}/tables/#{table_id.downcase}.json\")}",
212
+ "depends_on" => ["google_bigquery_dataset.#{@name}"]
213
+ }
214
+ end
215
+ end
216
+
5
217
  # Represents a Terraform configuration for a Manifold workspace.
6
218
  class WorkspaceConfiguration < Configuration
7
219
  attr_reader :name
220
+ attr_writer :merge_config, :manifold_config
8
221
 
9
222
  def initialize(name)
10
223
  super()
@@ -17,14 +230,12 @@ module Manifold
17
230
  @vectors << vector_config
18
231
  end
19
232
 
20
- attr_writer :merge_config
21
-
22
233
  def as_json
23
234
  {
24
235
  "variable" => variables_block,
25
236
  "resource" => {
26
237
  "google_bigquery_dataset" => dataset_config,
27
- "google_bigquery_table" => table_config,
238
+ "google_bigquery_table" => TableConfigBuilder.new(name).build_table_configs,
28
239
  "google_bigquery_routine" => routine_config
29
240
  }.compact
30
241
  }
@@ -51,53 +262,68 @@ module Manifold
51
262
  }
52
263
  end
53
264
 
54
- def table_config
55
- {
56
- "dimensions" => {
57
- "dataset_id" => name,
58
- "project" => "${var.project_id}",
59
- "table_id" => "Dimensions",
60
- "schema" => "${file(\"${path.module}/tables/dimensions.json\")}",
61
- "depends_on" => ["google_bigquery_dataset.#{name}"]
62
- }
63
- }
265
+ def routine_config
266
+ routines = {
267
+ "merge_dimensions" => dimensions_routine_attributes,
268
+ "merge_manifold" => manifold_routine_attributes
269
+ }.compact
270
+
271
+ routines.empty? ? nil : routines
64
272
  end
65
273
 
66
- def routine_config
274
+ def dimensions_routine_attributes
67
275
  return nil if @vectors.empty? || @merge_config.nil?
68
276
 
69
277
  {
70
- "merge_dimensions" => routine_attributes
278
+ "dataset_id" => name,
279
+ "project" => "${var.project_id}",
280
+ "routine_id" => "merge_dimensions",
281
+ "routine_type" => "PROCEDURE",
282
+ "language" => "SQL",
283
+ "definition_body" => dimensions_merge_routine,
284
+ "depends_on" => ["google_bigquery_dataset.#{name}"]
71
285
  }
72
286
  end
73
287
 
74
- def routine_attributes
288
+ def dimensions_merge_routine
289
+ return "" if @vectors.empty? || @merge_config.nil?
290
+
291
+ source_sql = File.read(Pathname.pwd.join(@merge_config["source"]))
292
+ SQLBuilder.new(name, @manifold_config).build_dimensions_merge_sql(source_sql)
293
+ end
294
+
295
+ def manifold_routine_attributes
296
+ return nil unless valid_manifold_config?
297
+
75
298
  {
76
299
  "dataset_id" => name,
77
300
  "project" => "${var.project_id}",
78
- "routine_id" => "merge_dimensions",
301
+ "routine_id" => "merge_manifold",
79
302
  "routine_type" => "PROCEDURE",
80
303
  "language" => "SQL",
81
- "definition_body" => merge_routine_definition,
304
+ "definition_body" => manifold_merge_routine,
82
305
  "depends_on" => ["google_bigquery_dataset.#{name}"]
83
306
  }
84
307
  end
85
308
 
86
- def merge_routine_definition
87
- source_sql = read_source_sql(@merge_config["source"])
88
- <<~SQL
89
- MERGE #{name}.Dimensions AS TARGET
90
- USING (
91
- #{source_sql}
92
- ) AS source
93
- ON source.id = target.id
94
- WHEN MATCHED THEN UPDATE SET target.dimensions = source.dimensions
95
- WHEN NOT MATCHED THEN INSERT ROW;
96
- SQL
309
+ def manifold_merge_routine
310
+ metrics_builder = MetricsBuilder.new(@manifold_config)
311
+ sql_builder = SQLBuilder.new(name, @manifold_config)
312
+ sql_builder.build_manifold_merge_sql(metrics_builder) do
313
+ metrics_builder.build_metrics_struct
314
+ end
315
+ end
316
+
317
+ def valid_manifold_config?
318
+ return false unless @manifold_config
319
+
320
+ required_fields_present?
97
321
  end
98
322
 
99
- def read_source_sql(source_path)
100
- File.read(Pathname.pwd.join(source_path))
323
+ def required_fields_present?
324
+ %w[source timestamp.field contexts metrics].all? do |field|
325
+ @manifold_config&.dig(*field.split("."))
326
+ end
101
327
  end
102
328
  end
103
329
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Manifold
4
- VERSION = "0.0.14"
4
+ VERSION = "0.0.16"
5
5
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: manifold-cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.14
4
+ version: 0.0.16
5
5
  platform: ruby
6
6
  authors:
7
7
  - claytongentry
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2025-02-07 00:00:00.000000000 Z
10
+ date: 2025-02-11 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: thor
@@ -43,6 +43,7 @@ files:
43
43
  - lib/manifold.rb
44
44
  - lib/manifold/api.rb
45
45
  - lib/manifold/api/project.rb
46
+ - lib/manifold/api/schema_generator.rb
46
47
  - lib/manifold/api/vector.rb
47
48
  - lib/manifold/api/workspace.rb
48
49
  - lib/manifold/cli.rb