manifold-cli 0.0.13 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 68f9ecd2e683b236126eb7d1bed9c9b8db446a551032976e64328596cf7e198b
4
- data.tar.gz: '0929cfaabd84490c3e13f5ca9737244dc7fa9834767fb101560c26a49b69047f'
3
+ metadata.gz: 465e6236b9114a9a9170502e55646ae094a392ee25bb9ce29af81046fdb1b386
4
+ data.tar.gz: b8dc973ee3caf57fcde8919b93580c2278850b90aaa1df82425f9a819fc66a2d
5
5
  SHA512:
6
- metadata.gz: 9cd0137b20b465bcf95117566a8d1509a3edfafeb8dc6865bc1da847cd5854f954b83674365e1fd2b394f60fbd9dba9baef2c6336d3ec6f703f9f16b41a857ad
7
- data.tar.gz: 0aea89c6924dbc65a2628b720e60175493299b794cd406904965109ac76bc45da1fa0fa560a70def0edfdf0e657d722987c1e67631029670aaf58425004d4389
6
+ metadata.gz: 69d18cf361bdbf945685aab0264c3222c7c692e45141e3e10352238523f180df0b35f97689bff3bb71712897ddb52fb1f037c8a951762fa1818d6d28a560944b
7
+ data.tar.gz: f8e29a241c6f76a6632e5e47c421ac03d0736c1af3d7e8e2a374026d30f76aded46c02359aaa1a0d29b720c9bdc65355ed4e44a761cfe1af549d730988982e77
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Manifold
4
+ module API
5
+ # Handles schema generation for Manifold tables
6
+ class SchemaGenerator
7
+ VALID_OPERATORS = %w[AND OR NOT NAND NOR XOR XNOR].freeze
8
+
9
+ def initialize(dimensions_fields, manifold_yaml)
10
+ @dimensions_fields = dimensions_fields
11
+ @manifold_yaml = manifold_yaml
12
+ end
13
+
14
+ def dimensions_schema
15
+ [
16
+ { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" },
17
+ { "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED",
18
+ "fields" => @dimensions_fields }
19
+ ]
20
+ end
21
+
22
+ def manifold_schema
23
+ [
24
+ { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" },
25
+ { "type" => "TIMESTAMP", "name" => "timestamp", "mode" => "REQUIRED" },
26
+ { "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED",
27
+ "fields" => @dimensions_fields },
28
+ { "type" => "RECORD", "name" => "metrics", "mode" => "REQUIRED",
29
+ "fields" => metrics_fields }
30
+ ]
31
+ end
32
+
33
+ private
34
+
35
+ def metrics_fields
36
+ return [] unless @manifold_yaml["contexts"] && @manifold_yaml["metrics"]
37
+
38
+ @manifold_yaml["contexts"].map do |context_name, _context_config|
39
+ {
40
+ "name" => context_name,
41
+ "type" => "RECORD",
42
+ "mode" => "NULLABLE",
43
+ "fields" => context_metrics_fields
44
+ }
45
+ end
46
+ end
47
+
48
+ def context_metrics_fields
49
+ [
50
+ *countif_fields,
51
+ *sumif_fields
52
+ ]
53
+ end
54
+
55
+ def countif_fields
56
+ return [] unless @manifold_yaml.dig("metrics", "countif")
57
+
58
+ [{
59
+ "name" => @manifold_yaml["metrics"]["countif"],
60
+ "type" => "INTEGER",
61
+ "mode" => "NULLABLE"
62
+ }]
63
+ end
64
+
65
+ def sumif_fields
66
+ return [] unless @manifold_yaml.dig("metrics", "sumif")
67
+
68
+ @manifold_yaml["metrics"]["sumif"].keys.map do |metric_name|
69
+ {
70
+ "name" => metric_name,
71
+ "type" => "INTEGER",
72
+ "mode" => "NULLABLE"
73
+ }
74
+ end
75
+ end
76
+
77
+ def validate_operator!(operator)
78
+ return if VALID_OPERATORS.include?(operator)
79
+
80
+ raise ArgumentError, "Invalid operator: #{operator}. Valid operators are: #{VALID_OPERATORS.join(", ")}"
81
+ end
82
+ end
83
+ end
84
+ end
@@ -2,6 +2,26 @@
2
2
 
3
3
  module Manifold
4
4
  module API
5
+ # Handles terraform configuration generation
6
+ class TerraformGenerator
7
+ def initialize(name, vectors, vector_service, manifold_yaml)
8
+ @name = name
9
+ @vectors = vectors
10
+ @vector_service = vector_service
11
+ @manifold_yaml = manifold_yaml
12
+ end
13
+
14
+ def generate(path)
15
+ config = Terraform::WorkspaceConfiguration.new(@name)
16
+ @vectors.each do |vector|
17
+ vector_config = @vector_service.load_vector_config(vector)
18
+ config.add_vector(vector_config)
19
+ end
20
+ config.merge_config = @manifold_yaml["dimensions"]&.fetch("merge", nil) if @manifold_yaml["dimensions"]
21
+ config.write(path)
22
+ end
23
+ end
24
+
5
25
  # Encapsulates a single manifold.
6
26
  class Workspace
7
27
  attr_reader :name, :template_path, :logger
@@ -27,11 +47,17 @@ module Manifold
27
47
  end
28
48
 
29
49
  def generate(with_terraform: false)
30
- return unless manifold_exists? && any_vectors?
50
+ return nil unless manifold_exists? && any_vectors?
31
51
 
52
+ tables_directory.mkpath
32
53
  generate_dimensions
33
- generate_terraform if with_terraform
54
+ generate_manifold
34
55
  logger.info("Generated BigQuery dimensions table schema for workspace '#{name}'.")
56
+
57
+ return unless with_terraform
58
+
59
+ generate_terraform
60
+ logger.info("Generated Terraform configuration for workspace '#{name}'.")
35
61
  end
36
62
 
37
63
  def tables_directory
@@ -74,16 +100,28 @@ module Manifold
74
100
  dimensions_path.write(dimensions_schema_json.concat("\n"))
75
101
  end
76
102
 
103
+ def generate_manifold
104
+ manifold_schema_path.write(manifold_schema_json.concat("\n"))
105
+ end
106
+
107
+ def manifold_schema_path
108
+ tables_directory.join("manifold.json")
109
+ end
110
+
111
+ def schema_generator
112
+ @schema_generator ||= SchemaGenerator.new(dimensions_fields, manifold_yaml)
113
+ end
114
+
115
+ def manifold_schema
116
+ schema_generator.manifold_schema
117
+ end
118
+
77
119
  def dimensions_schema
78
- [
79
- { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" },
80
- { "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED",
81
- "fields" => dimensions_fields }
82
- ]
120
+ schema_generator.dimensions_schema
83
121
  end
84
122
 
85
123
  def dimensions_fields
86
- vectors.filter_map do |vector|
124
+ @dimensions_fields ||= vectors.filter_map do |vector|
87
125
  logger.info("Loading vector schema for '#{vector}'.")
88
126
  @vector_service.load_vector_schema(vector)
89
127
  end
@@ -106,12 +144,12 @@ module Manifold
106
144
  end
107
145
 
108
146
  def generate_terraform
109
- config = Terraform::WorkspaceConfiguration.new(name)
110
- vectors.each do |vector|
111
- vector_config = @vector_service.load_vector_config(vector)
112
- config.add_vector(vector_config)
113
- end
114
- config.write(terraform_main_path)
147
+ terraform_generator = TerraformGenerator.new(name, vectors, @vector_service, manifold_yaml)
148
+ terraform_generator.generate(terraform_main_path)
149
+ end
150
+
151
+ def manifold_schema_json
152
+ JSON.pretty_generate(manifold_schema)
115
153
  end
116
154
  end
117
155
  end
@@ -9,7 +9,3 @@ attributes:
9
9
  # key_values:
10
10
  # first_key: STRING
11
11
  # second_key: STRING:REQUIRED
12
-
13
- # Optionally, reference a view specifying how to select vector dimensions
14
- # merge:
15
- # source: lib/views/select_my_vector.sql
@@ -3,23 +3,29 @@ vectors:
3
3
  # Example:
4
4
  # - User
5
5
 
6
- metrics:
7
- - name: # Add your metric name here, e.g. Pageviews
6
+ dimensions:
7
+ # Reference a view specifying how to select vector dimensions
8
+ merge:
9
+ source: lib/views/select_my_vector.sql
8
10
 
9
- id:
10
- field: # Identify the field that uniquely identifies each manifold vector
11
- type: # Specify the type of that field, e.g. INTEGER
11
+ timestamp:
12
+ interval: HOUR
13
+ field: timestamp
12
14
 
13
- interval:
14
- type: # Specify the interval type, e.g. TIMESTAMP or DATE
15
- expression: # Compute the interval for the entry, e.g. TIMESTAMP_TRUNC(timestamp, HOUR)
15
+ contexts:
16
+ paid: IS_PAID(context.location)
17
+ organic: IS_ORGANIC(context.location)
18
+ paidOrganic:
19
+ fields:
20
+ - paid
21
+ - organic
22
+ operator: AND
16
23
 
17
- aggregations:
18
- # Add any aggregations this metric should present
24
+ metrics:
25
+ countif: tapCount
26
+ sumif:
27
+ sequenceSum:
28
+ field: context.sequence
19
29
 
20
- source:
21
- type: BIGQUERY_TABLE
22
- project: # Add your project name here
23
- dataset: # Add your dataset name here
24
- table: # Add your table name
25
- filter: # (optional) Add your filter condition here
30
+ source: my_project.my_dataset.my_table
31
+ filter: timestamp >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 90 DAY)
@@ -10,12 +10,15 @@ module Manifold
10
10
  super()
11
11
  @name = name
12
12
  @vectors = []
13
+ @merge_config = nil
13
14
  end
14
15
 
15
16
  def add_vector(vector_config)
16
17
  @vectors << vector_config
17
18
  end
18
19
 
20
+ attr_writer :merge_config
21
+
19
22
  def as_json
20
23
  {
21
24
  "variable" => variables_block,
@@ -50,51 +53,60 @@ module Manifold
50
53
 
51
54
  def table_config
52
55
  {
53
- "dimensions" => {
54
- "dataset_id" => name,
55
- "project" => "${var.project_id}",
56
- "table_id" => "Dimensions",
57
- "schema" => "${file(\"${path.module}/tables/dimensions.json\")}",
58
- "depends_on" => ["google_bigquery_dataset.#{name}"]
59
- }
56
+ "dimensions" => dimensions_table_config,
57
+ "manifold" => manifold_table_config
60
58
  }
61
59
  end
62
60
 
63
- def routine_config
64
- return nil if @vectors.empty?
61
+ def dimensions_table_config
62
+ {
63
+ "dataset_id" => name,
64
+ "project" => "${var.project_id}",
65
+ "table_id" => "Dimensions",
66
+ "schema" => "${file(\"${path.module}/tables/dimensions.json\")}",
67
+ "depends_on" => ["google_bigquery_dataset.#{name}"]
68
+ }
69
+ end
65
70
 
66
- routines = @vectors.filter_map { |vector| build_routine(vector) }
67
- routines.empty? ? nil : routines.to_h
71
+ def manifold_table_config
72
+ {
73
+ "dataset_id" => name,
74
+ "project" => "${var.project_id}",
75
+ "table_id" => "Manifold",
76
+ "schema" => "${file(\"${path.module}/tables/manifold.json\")}",
77
+ "depends_on" => ["google_bigquery_dataset.#{name}"]
78
+ }
68
79
  end
69
80
 
70
- def build_routine(vector)
71
- return nil unless vector["merge"]&.fetch("source", nil)
81
+ def routine_config
82
+ return nil if @vectors.empty? || @merge_config.nil?
72
83
 
73
- routine_name = "merge_#{vector["name"].downcase}_dimensions"
74
- [routine_name, routine_attributes(routine_name, vector)]
84
+ {
85
+ "merge_dimensions" => routine_attributes
86
+ }
75
87
  end
76
88
 
77
- def routine_attributes(routine_name, vector)
89
+ def routine_attributes
78
90
  {
79
91
  "dataset_id" => name,
80
92
  "project" => "${var.project_id}",
81
- "routine_id" => routine_name,
93
+ "routine_id" => "merge_dimensions",
82
94
  "routine_type" => "PROCEDURE",
83
95
  "language" => "SQL",
84
- "definition_body" => merge_routine_definition(vector),
96
+ "definition_body" => merge_routine_definition,
85
97
  "depends_on" => ["google_bigquery_dataset.#{name}"]
86
98
  }
87
99
  end
88
100
 
89
- def merge_routine_definition(vector)
90
- source_sql = read_source_sql(vector["merge"]["source"])
101
+ def merge_routine_definition
102
+ source_sql = read_source_sql(@merge_config["source"])
91
103
  <<~SQL
92
104
  MERGE #{name}.Dimensions AS TARGET
93
105
  USING (
94
106
  #{source_sql}
95
107
  ) AS source
96
108
  ON source.id = target.id
97
- WHEN MATCHED THEN UPDATE SET target.#{vector["name"].downcase} = source.dimensions
109
+ WHEN MATCHED THEN UPDATE SET target.dimensions = source.dimensions
98
110
  WHEN NOT MATCHED THEN INSERT ROW;
99
111
  SQL
100
112
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Manifold
4
- VERSION = "0.0.13"
4
+ VERSION = "0.0.15"
5
5
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: manifold-cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.13
4
+ version: 0.0.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - claytongentry
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2025-02-07 00:00:00.000000000 Z
10
+ date: 2025-02-09 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: thor
@@ -43,6 +43,7 @@ files:
43
43
  - lib/manifold.rb
44
44
  - lib/manifold/api.rb
45
45
  - lib/manifold/api/project.rb
46
+ - lib/manifold/api/schema_generator.rb
46
47
  - lib/manifold/api/vector.rb
47
48
  - lib/manifold/api/workspace.rb
48
49
  - lib/manifold/cli.rb