manifold-cli 0.0.18 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 26f9112132a14f5bbb3cd123dc4d399ed9d618ec41638eaf43b5bbb0f870b6dd
4
- data.tar.gz: cc8bc857ca5163f2e86ffe8656fe48b8fb46c3295bfa7f3ffad531597e0b15df
3
+ metadata.gz: 72cd86649418feb395a957902e3e966e3df9fdc2931729beeb6719b0a98e831b
4
+ data.tar.gz: 399938be40f27388f0d9c742b922d5ab5909cc132e76c9f0a793e89ed2f4277b
5
5
  SHA512:
6
- metadata.gz: dba055e83e3ef141fd49c8af5d7079d33a81ac6a54539c32270e86ecbd73840ee183f611e99f7c985c1b7c5cbb7bd32b2ac650abc8faa85946600a2ddf10af1b
7
- data.tar.gz: d44010ad67f25f6ee9bd4762ee34a319062ebb82d68af81192bdba8becebad6850fd65e3e5e12f628114dd19ba17378bbecf196320e226613bb29e4062b3a80d
6
+ metadata.gz: a6b9919146b5e9cce7e32b423cc9119c6371a95fa5b4733d72ac217c23fc91ba95f95c472225afbb577a5bc8a359c01f26cbe2dcbc705b9792f402bcb88d6d0a
7
+ data.tar.gz: 691783ca633a17ae13f0488fcd69c64521dae189cbc139d1f37a011fb65dd2d1c9fadfc43903d998b82acbbb61d52c94336962229f3b69739d5050b9e1dc51be
@@ -0,0 +1,162 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Manifold
4
+ module API
5
+ # Handles schema generation and writing for Manifold tables
6
+ # rubocop:disable Metrics/ClassLength
7
+ class SchemaManager
8
+ def initialize(name, vectors, vector_service, manifold_yaml, logger)
9
+ @name = name
10
+ @vectors = vectors
11
+ @vector_service = vector_service
12
+ @manifold_yaml = manifold_yaml
13
+ @logger = logger
14
+ end
15
+
16
+ # Generates and writes schemas to the specified directory
17
+ def write_schemas(tables_directory)
18
+ tables_directory.mkpath
19
+ write_dimensions_schema(tables_directory)
20
+ write_manifold_schema(tables_directory)
21
+ write_metrics_schemas(tables_directory)
22
+ end
23
+
24
+ # Returns the dimensions schema structure
25
+ def dimensions_schema
26
+ [
27
+ { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" },
28
+ { "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED",
29
+ "fields" => dimensions_fields }
30
+ ]
31
+ end
32
+
33
+ # Returns the manifold schema structure
34
+ def manifold_schema
35
+ [
36
+ { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" },
37
+ { "type" => "TIMESTAMP", "name" => "timestamp", "mode" => "REQUIRED" },
38
+ { "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED",
39
+ "fields" => dimensions_fields },
40
+ { "type" => "RECORD", "name" => "metrics", "mode" => "REQUIRED",
41
+ "fields" => metrics_fields }
42
+ ]
43
+ end
44
+
45
+ private
46
+
47
+ def write_dimensions_schema(tables_directory)
48
+ dimensions_path = tables_directory.join("dimensions.json")
49
+ dimensions_path.write(dimensions_schema_json.concat("\n"))
50
+ end
51
+
52
+ def write_manifold_schema(tables_directory)
53
+ manifold_path = tables_directory.join("manifold.json")
54
+ manifold_path.write(manifold_schema_json.concat("\n"))
55
+ end
56
+
57
+ def write_metrics_schemas(tables_directory)
58
+ return unless @manifold_yaml["metrics"]
59
+
60
+ # Create metrics subdirectory
61
+ metrics_directory = tables_directory.join("metrics")
62
+ metrics_directory.mkpath
63
+
64
+ @manifold_yaml["metrics"].each do |group_name, group_config|
65
+ metrics_table_path = metrics_directory.join("#{group_name}.json")
66
+ metrics_table_schema = metrics_table_schema(group_name, group_config)
67
+ metrics_table_path.write(JSON.pretty_generate(metrics_table_schema).concat("\n"))
68
+ @logger.info("Generated metrics table schema for '#{group_name}'.")
69
+ end
70
+ end
71
+
72
+ def metrics_table_schema(group_name, group_config)
73
+ [
74
+ { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" },
75
+ { "type" => "TIMESTAMP", "name" => "timestamp", "mode" => "REQUIRED" },
76
+ { "type" => "RECORD", "name" => "metrics", "mode" => "REQUIRED",
77
+ "fields" => [metrics_group_field(group_name, group_config)] }
78
+ ]
79
+ end
80
+
81
+ def metrics_group_field(group_name, group_config)
82
+ {
83
+ "name" => group_name,
84
+ "type" => "RECORD",
85
+ "mode" => "NULLABLE",
86
+ "fields" => group_metrics_fields(group_config)
87
+ }
88
+ end
89
+
90
+ def dimensions_fields
91
+ @dimensions_fields ||= @vectors.filter_map do |vector|
92
+ @logger.info("Loading vector schema for '#{vector}'.")
93
+ @vector_service.load_vector_schema(vector)
94
+ end
95
+ end
96
+
97
+ def dimensions_schema_json
98
+ JSON.pretty_generate(dimensions_schema)
99
+ end
100
+
101
+ def manifold_schema_json
102
+ JSON.pretty_generate(manifold_schema)
103
+ end
104
+
105
+ def metrics_fields
106
+ return [] unless @manifold_yaml["metrics"]
107
+
108
+ @manifold_yaml["metrics"].map do |group_name, group_config|
109
+ {
110
+ "name" => group_name,
111
+ "type" => "RECORD",
112
+ "mode" => "NULLABLE",
113
+ "fields" => group_metrics_fields(group_config)
114
+ }
115
+ end
116
+ end
117
+
118
+ def group_metrics_fields(group_config)
119
+ return [] unless group_config["breakouts"] && group_config["aggregations"]
120
+
121
+ group_config["breakouts"].map do |breakout_name, _breakout_config|
122
+ {
123
+ "name" => breakout_name,
124
+ "type" => "RECORD",
125
+ "mode" => "NULLABLE",
126
+ "fields" => breakout_metrics_fields(group_config)
127
+ }
128
+ end
129
+ end
130
+
131
+ def breakout_metrics_fields(group_config)
132
+ [
133
+ *countif_fields(group_config),
134
+ *sumif_fields(group_config)
135
+ ]
136
+ end
137
+
138
+ def countif_fields(group_config)
139
+ return [] unless group_config.dig("aggregations", "countif")
140
+
141
+ [{
142
+ "name" => group_config["aggregations"]["countif"],
143
+ "type" => "INTEGER",
144
+ "mode" => "NULLABLE"
145
+ }]
146
+ end
147
+
148
+ def sumif_fields(group_config)
149
+ return [] unless group_config.dig("aggregations", "sumif")
150
+
151
+ group_config["aggregations"]["sumif"].keys.map do |metric_name|
152
+ {
153
+ "name" => metric_name,
154
+ "type" => "INTEGER",
155
+ "mode" => "NULLABLE"
156
+ }
157
+ end
158
+ end
159
+ end
160
+ # rubocop:enable Metrics/ClassLength
161
+ end
162
+ end
@@ -25,85 +25,6 @@ module Manifold
25
25
  end
26
26
  end
27
27
 
28
- # Handles SQL generation for manifold workspaces
29
- class SqlGenerator
30
- def initialize(name, manifold_yaml)
31
- @name = name
32
- @manifold_yaml = manifold_yaml
33
- end
34
-
35
- def generate_dimensions_merge_sql(source_sql)
36
- return unless valid_dimensions_config?
37
-
38
- sql_builder = Terraform::SQLBuilder.new(@name, @manifold_yaml)
39
- sql_builder.build_dimensions_merge_sql(source_sql)
40
- end
41
-
42
- private
43
-
44
- def valid_dimensions_config?
45
- return false unless @manifold_yaml
46
-
47
- !@manifold_yaml["dimensions"]&.dig("merge", "source").nil?
48
- end
49
- end
50
-
51
- # Handles schema file generation for manifold workspaces
52
- class SchemaWriter
53
- def initialize(name, vectors, vector_service, manifold_yaml, logger)
54
- @name = name
55
- @vectors = vectors
56
- @vector_service = vector_service
57
- @manifold_yaml = manifold_yaml
58
- @logger = logger
59
- end
60
-
61
- def write_schemas(tables_directory)
62
- tables_directory.mkpath
63
- write_dimensions_schema(tables_directory)
64
- write_manifold_schema(tables_directory)
65
- end
66
-
67
- private
68
-
69
- def write_dimensions_schema(tables_directory)
70
- dimensions_path = tables_directory.join("dimensions.json")
71
- dimensions_path.write(dimensions_schema_json.concat("\n"))
72
- end
73
-
74
- def write_manifold_schema(tables_directory)
75
- manifold_path = tables_directory.join("manifold.json")
76
- manifold_path.write(manifold_schema_json.concat("\n"))
77
- end
78
-
79
- def schema_generator
80
- @schema_generator ||= SchemaGenerator.new(dimensions_fields, @manifold_yaml)
81
- end
82
-
83
- def manifold_schema
84
- schema_generator.manifold_schema
85
- end
86
-
87
- def dimensions_schema
88
- schema_generator.dimensions_schema
89
- end
90
-
91
- def dimensions_fields
92
- @dimensions_fields ||= @vectors.filter_map do |vector|
93
- @logger.info("Loading vector schema for '#{vector}'.")
94
- @vector_service.load_vector_schema(vector)
95
- end
96
- end
97
-
98
- def dimensions_schema_json
99
- JSON.pretty_generate(dimensions_schema)
100
- end
101
-
102
- def manifold_schema_json
103
- JSON.pretty_generate(manifold_schema)
104
- end
105
- end
106
-
107
28
  # Encapsulates a single manifold.
108
29
  class Workspace
109
30
  attr_reader :name, :template_path, :logger
@@ -131,7 +52,7 @@ module Manifold
131
52
  def generate(with_terraform: false)
132
53
  return nil unless manifold_exists? && any_vectors?
133
54
 
134
- generate_schemas
55
+ write_schemas
135
56
  logger.info("Generated BigQuery dimensions table schema for workspace '#{name}'.")
136
57
 
137
58
  return unless with_terraform
@@ -177,21 +98,20 @@ module Manifold
177
98
  end
178
99
 
179
100
  def write_dimensions_merge_sql
180
- return unless dimensions_merge_source_exists?
101
+ return unless valid_dimensions_config?
181
102
 
182
- sql = generate_dimensions_merge_sql
103
+ source_sql = File.read(Pathname.pwd.join(manifold_yaml["dimensions"]["merge"]["source"]))
104
+ sql_builder = Terraform::SQLBuilder.new(name, manifold_yaml)
105
+ sql = sql_builder.build_dimensions_merge_sql(source_sql)
183
106
  return unless sql
184
107
 
185
108
  write_dimensions_merge_sql_file(sql)
186
109
  end
187
110
 
188
- def dimensions_merge_source_exists?
189
- manifold_yaml["dimensions"]&.dig("merge", "source")
190
- end
111
+ def valid_dimensions_config?
112
+ return false unless manifold_yaml
191
113
 
192
- def generate_dimensions_merge_sql
193
- source_sql = File.read(Pathname.pwd.join(manifold_yaml["dimensions"]["merge"]["source"]))
194
- SqlGenerator.new(name, manifold_yaml).generate_dimensions_merge_sql(source_sql)
114
+ !manifold_yaml["dimensions"]&.dig("merge", "source").nil?
195
115
  end
196
116
 
197
117
  def write_dimensions_merge_sql_file(sql)
@@ -213,9 +133,9 @@ module Manifold
213
133
  @manifold_yaml ||= YAML.safe_load_file(manifold_path)
214
134
  end
215
135
 
216
- def generate_schemas
217
- SchemaWriter.new(name, vectors, @vector_service, manifold_yaml, logger)
218
- .write_schemas(tables_directory)
136
+ def write_schemas
137
+ SchemaManager.new(name, vectors, @vector_service, manifold_yaml, logger)
138
+ .write_schemas(tables_directory)
219
139
  end
220
140
 
221
141
  def any_vectors?
@@ -24,5 +24,4 @@ metrics:
24
24
  sequenceSum:
25
25
  field: context.sequence
26
26
 
27
- source: my_project.render_metrics
28
27
  filter: timestamp >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 90 DAY)
@@ -6,7 +6,7 @@ module Manifold
6
6
  class ProjectConfiguration < Configuration
7
7
  attr_reader :workspaces, :provider_version, :skip_provider_config
8
8
 
9
- DEFAULT_TERRAFORM_GOOGLE_PROVIDER_VERSION = "6.18.1"
9
+ DEFAULT_TERRAFORM_GOOGLE_PROVIDER_VERSION = "6.24.0"
10
10
 
11
11
  def initialize(workspaces, provider_version: DEFAULT_TERRAFORM_GOOGLE_PROVIDER_VERSION,
12
12
  skip_provider_config: false)
@@ -2,50 +2,11 @@
2
2
 
3
3
  module Manifold
4
4
  module Terraform
5
- # Handles building metrics SQL for manifold routines
6
- class MetricsSQLBuilder
7
- def initialize(name, manifold_config)
8
- @name = name
9
- @manifold_config = manifold_config
10
- end
11
-
12
- def build_metrics_select
13
- <<~SQL
14
- SELECT
15
- id,
16
- timestamp,
17
- #{build_metrics_struct}
18
- FROM #{build_metric_joins}
19
- SQL
20
- end
21
-
22
- private
23
-
24
- def build_metrics_struct
25
- metric_groups = @manifold_config["metrics"].keys
26
- metric_groups.map { |group| "#{group}.metrics #{group}" }.join(",\n ")
27
- end
28
-
29
- def build_metric_joins
30
- metric_groups = @manifold_config["metrics"]
31
- joins = metric_groups.map { |group, config| "#{config["source"]} AS #{group}" }
32
- first = joins.shift
33
- return first if joins.empty?
34
-
35
- "#{first}\n #{joins.map { |table| "FULL OUTER JOIN #{table} USING (id, timestamp)" }.join("\n ")}"
36
- end
37
-
38
- def timestamp_field
39
- @manifold_config&.dig("timestamp", "field")
40
- end
41
- end
42
-
43
5
  # Handles building SQL for manifold routines
44
6
  class SQLBuilder
45
7
  def initialize(name, manifold_config)
46
8
  @name = name
47
9
  @manifold_config = manifold_config
48
- @metrics_builder = MetricsSQLBuilder.new(name, manifold_config)
49
10
  end
50
11
 
51
12
  def build_manifold_merge_sql
@@ -75,22 +36,21 @@ module Manifold
75
36
  private
76
37
 
77
38
  def valid_config?
78
- source_table && timestamp_field && @manifold_config["metrics"]
79
- end
80
-
81
- def source_table
82
- first_group = @manifold_config["metrics"]&.values&.first
83
- first_group&.dig("source")
39
+ timestamp_field && @manifold_config["metrics"] && !@manifold_config["metrics"].empty?
84
40
  end
85
41
 
86
42
  def timestamp_field
87
43
  @manifold_config&.dig("timestamp", "field")
88
44
  end
89
45
 
46
+ def metrics_table_name(group_name)
47
+ "#{group_name.capitalize}Metrics"
48
+ end
49
+
90
50
  def build_source_query
91
51
  <<~SQL
92
52
  WITH Metrics AS (
93
- #{@metrics_builder.build_metrics_select}
53
+ #{build_metrics_select}
94
54
  )
95
55
 
96
56
  SELECT
@@ -117,23 +77,65 @@ module Manifold
117
77
  INSERT ROW;
118
78
  SQL
119
79
  end
80
+
81
+ # Metrics SQL building methods
82
+ def build_metrics_select
83
+ <<~SQL
84
+ SELECT
85
+ id,
86
+ timestamp,
87
+ #{build_metrics_struct}
88
+ FROM #{build_metric_joins}
89
+ SQL
90
+ end
91
+
92
+ def build_metrics_struct
93
+ metric_groups = @manifold_config["metrics"].keys
94
+ metric_groups.map { |group| "#{group}.metrics #{group}" }.join(",\n ")
95
+ end
96
+
97
+ def build_metric_joins
98
+ metric_groups = @manifold_config["metrics"]
99
+ joins = metric_groups.map do |group, config|
100
+ table = "#{@name}.#{metrics_table_name(group)}"
101
+ filter = config["filter"] ? " WHERE #{config["filter"]}" : ""
102
+ "(SELECT * FROM #{table}#{filter}) AS #{group}"
103
+ end
104
+ first = joins.shift
105
+ return first if joins.empty?
106
+
107
+ "#{first}\n #{joins.map { |table| "FULL OUTER JOIN #{table} USING (id, timestamp)" }.join("\n ")}"
108
+ end
120
109
  end
121
110
 
122
111
  # Handles building table configurations
123
112
  class TableConfigBuilder
124
- def initialize(name)
113
+ def initialize(name, manifold_config = nil)
125
114
  @name = name
115
+ @manifold_config = manifold_config
126
116
  end
127
117
 
128
118
  def build_table_configs
129
- {
119
+ configs = {
130
120
  "dimensions" => dimensions_table_config,
131
121
  "manifold" => manifold_table_config
132
122
  }
123
+
124
+ if @manifold_config&.dig("metrics")
125
+ @manifold_config["metrics"].each_key do |group_name|
126
+ configs[metrics_table_name(group_name).downcase] = metrics_table_config(group_name)
127
+ end
128
+ end
129
+
130
+ configs
133
131
  end
134
132
 
135
133
  private
136
134
 
135
+ def metrics_table_name(group_name)
136
+ "#{group_name.capitalize}Metrics"
137
+ end
138
+
137
139
  def dimensions_table_config
138
140
  build_table_config("Dimensions")
139
141
  end
@@ -142,12 +144,18 @@ module Manifold
142
144
  build_table_config("Manifold")
143
145
  end
144
146
 
145
- def build_table_config(table_id)
147
+ def metrics_table_config(group_name)
148
+ titlecased_name = metrics_table_name(group_name)
149
+ build_table_config(titlecased_name, "metrics/#{group_name}.json")
150
+ end
151
+
152
+ def build_table_config(table_id, schema_path = nil)
153
+ schema_path ||= "#{table_id.downcase}.json"
146
154
  {
147
155
  "dataset_id" => @name,
148
156
  "project" => "${var.project_id}",
149
157
  "table_id" => table_id,
150
- "schema" => "${file(\"${path.module}/tables/#{table_id.downcase}.json\")}",
158
+ "schema" => "${file(\"${path.module}/tables/#{schema_path}\")}",
151
159
  "depends_on" => ["google_bigquery_dataset.#{@name}"]
152
160
  }
153
161
  end
@@ -174,7 +182,7 @@ module Manifold
174
182
  "variable" => variables_block,
175
183
  "resource" => {
176
184
  "google_bigquery_dataset" => dataset_config,
177
- "google_bigquery_table" => TableConfigBuilder.new(name).build_table_configs,
185
+ "google_bigquery_table" => TableConfigBuilder.new(name, @manifold_config).build_table_configs,
178
186
  "google_bigquery_routine" => routine_config
179
187
  }.compact
180
188
  }
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Manifold
4
- VERSION = "0.0.18"
4
+ VERSION = "0.1.0"
5
5
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: manifold-cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.18
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - claytongentry
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2025-02-19 00:00:00.000000000 Z
10
+ date: 2025-03-11 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: thor
@@ -43,7 +43,7 @@ files:
43
43
  - lib/manifold.rb
44
44
  - lib/manifold/api.rb
45
45
  - lib/manifold/api/project.rb
46
- - lib/manifold/api/schema_generator.rb
46
+ - lib/manifold/api/schema_manager.rb
47
47
  - lib/manifold/api/vector.rb
48
48
  - lib/manifold/api/workspace.rb
49
49
  - lib/manifold/cli.rb
@@ -1,89 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Manifold
4
- module API
5
- # Handles schema generation for Manifold tables
6
- class SchemaGenerator
7
- def initialize(dimensions_fields, manifold_yaml)
8
- @dimensions_fields = dimensions_fields
9
- @manifold_yaml = manifold_yaml
10
- end
11
-
12
- def dimensions_schema
13
- [
14
- { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" },
15
- { "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED",
16
- "fields" => @dimensions_fields }
17
- ]
18
- end
19
-
20
- def manifold_schema
21
- [
22
- { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" },
23
- { "type" => "TIMESTAMP", "name" => "timestamp", "mode" => "REQUIRED" },
24
- { "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED",
25
- "fields" => @dimensions_fields },
26
- { "type" => "RECORD", "name" => "metrics", "mode" => "REQUIRED",
27
- "fields" => metrics_fields }
28
- ]
29
- end
30
-
31
- private
32
-
33
- def metrics_fields
34
- return [] unless @manifold_yaml["metrics"]
35
-
36
- @manifold_yaml["metrics"].map do |group_name, group_config|
37
- {
38
- "name" => group_name,
39
- "type" => "RECORD",
40
- "mode" => "NULLABLE",
41
- "fields" => group_metrics_fields(group_config)
42
- }
43
- end
44
- end
45
-
46
- def group_metrics_fields(group_config)
47
- return [] unless group_config["breakouts"] && group_config["aggregations"]
48
-
49
- group_config["breakouts"].map do |breakout_name, _breakout_config|
50
- {
51
- "name" => breakout_name,
52
- "type" => "RECORD",
53
- "mode" => "NULLABLE",
54
- "fields" => breakout_metrics_fields(group_config)
55
- }
56
- end
57
- end
58
-
59
- def breakout_metrics_fields(group_config)
60
- [
61
- *countif_fields(group_config),
62
- *sumif_fields(group_config)
63
- ]
64
- end
65
-
66
- def countif_fields(group_config)
67
- return [] unless group_config.dig("aggregations", "countif")
68
-
69
- [{
70
- "name" => group_config["aggregations"]["countif"],
71
- "type" => "INTEGER",
72
- "mode" => "NULLABLE"
73
- }]
74
- end
75
-
76
- def sumif_fields(group_config)
77
- return [] unless group_config.dig("aggregations", "sumif")
78
-
79
- group_config["aggregations"]["sumif"].keys.map do |metric_name|
80
- {
81
- "name" => metric_name,
82
- "type" => "INTEGER",
83
- "mode" => "NULLABLE"
84
- }
85
- end
86
- end
87
- end
88
- end
89
- end