manifold-cli 0.0.8 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d0478f04fda1c307a346fb2e0a58f93949e73fadbf533b10d1bc374d3c9dd6b3
4
- data.tar.gz: b6b8fcd1cdc6291af4a01b2014d61257f7b4ac585f54e1f93beaabfc31b5a511
3
+ metadata.gz: 1eb276f49eed54c682bf10a9edc53c567a4e61c1dc2c7ee1b27e98596c91d81f
4
+ data.tar.gz: 3e9374eeb7c36b5a5cd99b17a1d168b363ee161c23ffcae3ec54a170401ae34f
5
5
  SHA512:
6
- metadata.gz: fb8477bd304089eb1d48c5b2f276333b5ef97d15c2eb10d154ae7ae52aee0531930ae4f5871a699abf4cefdcb1ff14cc70f3b6bf4bbfd619a6a4d8e8ab05c044
7
- data.tar.gz: 5dc53ac8f096d39b85f1bfd39a47b1c6d1ed2f28d4d2a8bb0e14e9c0e687e18ab3960161fe774e47c21f0322a253b561dee46d826b554e058bbb8cdd0265dab1
6
+ metadata.gz: d1760a9ee8d1f2a8c78521761168696e50d87d6ca1194feb68f6a4c8a552d112624ba9c5512226f042bacedee1ae490a55f2e8ec33179e2cc588e26eb8f8e302
7
+ data.tar.gz: 57ff9863d1ce37d21ac58cdc8b634449754106a2472a3d1bfbebd6332d302b1ec39377ffb5dad882091df5b500bbb654c0ea385e2a60190486f903701a4d8035
data/.rubocop.yml CHANGED
@@ -4,7 +4,7 @@ require: rubocop-rspec
4
4
 
5
5
  AllCops:
6
6
  NewCops: enable
7
- TargetRubyVersion: 3.1
7
+ TargetRubyVersion: 3.2
8
8
 
9
9
  Style/StringLiterals:
10
10
  EnforcedStyle: double_quotes
data/README.md CHANGED
@@ -61,7 +61,29 @@ manifold add <data_project_name>
61
61
  After you fill out the manifold.yml file, this command generates the necessary BigQuery schema files based on the specified dimensions and metrics.
62
62
 
63
63
  ```bash
64
- manifold generate <data_project_name> bq
64
+ manifold generate
65
+ ```
66
+
67
+ 4. **Generate Terraform Configuration (Optional)**
68
+
69
+ Manifold can optionally generate Terraform configurations for managing your BigQuery resources. To generate both BigQuery schemas and Terraform configurations, use the `--tf` flag:
70
+
71
+ ```bash
72
+ manifold generate --tf
73
+ ```
74
+
75
+ This will create:
76
+
77
+ - A root `main.tf.json` file that sets up the Google Cloud provider and workspace modules
78
+ - Individual workspace configurations in `workspaces/<workspace_name>/main.tf.json`
79
+ - Dataset and table definitions that reference your generated BigQuery schemas
80
+
81
+ The generated Terraform configurations use the Google Cloud provider and expect a `project_id` variable to be set. You can apply these configurations using standard Terraform commands:
82
+
83
+ ```bash
84
+ terraform init
85
+ terraform plan -var="project_id=your-project-id"
86
+ terraform apply -var="project_id=your-project-id"
65
87
  ```
66
88
 
67
89
  ## Manifold Configuration
@@ -22,8 +22,9 @@ module Manifold
22
22
  @workspaces ||= workspace_directories.map { |dir| Workspace.from_directory(dir, logger:) }
23
23
  end
24
24
 
25
- def generate
26
- workspaces.each(&:generate)
25
+ def generate(with_terraform: false)
26
+ workspaces.each { |w| w.generate(with_terraform:) }
27
+ generate_terraform_entrypoint if with_terraform
27
28
  end
28
29
 
29
30
  def workspaces_directory
@@ -39,6 +40,11 @@ module Manifold
39
40
  def workspace_directories
40
41
  workspaces_directory.children.select(&:directory?)
41
42
  end
43
+
44
+ def generate_terraform_entrypoint
45
+ config = Terraform::ProjectConfiguration.new(workspaces)
46
+ config.write(directory.join("main.tf.json"))
47
+ end
42
48
  end
43
49
  end
44
50
  end
@@ -26,10 +26,11 @@ module Manifold
26
26
  FileUtils.cp(template_path, manifold_path)
27
27
  end
28
28
 
29
- def generate
29
+ def generate(with_terraform: false)
30
30
  return unless manifold_exists? && any_vectors?
31
31
 
32
32
  generate_dimensions
33
+ generate_terraform if with_terraform
33
34
  logger.info("Generated BigQuery dimensions table schema for workspace '#{name}'.")
34
35
  end
35
36
 
@@ -55,6 +56,10 @@ module Manifold
55
56
  directory.join("manifold.yml")
56
57
  end
57
58
 
59
+ def terraform_main_path
60
+ directory.join("main.tf.json")
61
+ end
62
+
58
63
  private
59
64
 
60
65
  def directory
@@ -99,6 +104,15 @@ module Manifold
99
104
  def vectors
100
105
  manifold_yaml["vectors"]
101
106
  end
107
+
108
+ def generate_terraform
109
+ config = Terraform::WorkspaceConfiguration.new(name)
110
+ vectors.each do |vector|
111
+ vector_config = @vector_service.load_vector_config(vector)
112
+ config.add_vector(vector_config)
113
+ end
114
+ config.write(terraform_main_path)
115
+ end
102
116
  end
103
117
  end
104
118
  end
data/lib/manifold/cli.rb CHANGED
@@ -46,10 +46,12 @@ module Manifold
46
46
  end
47
47
 
48
48
  desc "generate", "Generate BigQuery schema for all workspaces in the project"
49
+ method_option :tf, type: :boolean, desc: "Generate Terraform configurations"
49
50
  def generate
50
- name = Pathname.pwd.basename.to_s
51
- project = API::Project.new(name, directory: Pathname.pwd, logger:)
52
- project.generate
51
+ path = Pathname.pwd
52
+ name = path.basename.to_s
53
+ project = API::Project.new(name, directory: path, logger:)
54
+ project.generate(with_terraform: options[:tf])
53
55
  logger.info "Generated BigQuery schema for all workspaces in the project."
54
56
  end
55
57
  end
@@ -19,6 +19,16 @@ module Manifold
19
19
  raise "Invalid YAML in vector configuration #{path}: #{e.message}"
20
20
  end
21
21
 
22
+ def load_vector_config(vector_name)
23
+ path = config_path(vector_name)
24
+ config = YAML.safe_load_file(path)
25
+ config.merge("name" => vector_name.downcase)
26
+ rescue Errno::ENOENT, Errno::EISDIR
27
+ raise "Vector configuration not found: #{path}"
28
+ rescue Psych::Exception => e
29
+ raise "Invalid YAML in vector configuration #{path}: #{e.message}"
30
+ end
31
+
22
32
  private
23
33
 
24
34
  def transform_attributes_to_schema(attributes)
@@ -5,3 +5,7 @@ attributes:
5
5
  # id: STRING
6
6
  # created_at: TIMESTAMP
7
7
  # status: STRING
8
+
9
+ # Optionally, reference a view specifying how to select vector dimensions
10
+ # merge:
11
+ # source: lib/views/select_my_vector.sql
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Manifold
4
+ module Terraform
5
+ # Provides a base class for Terraform configuration files.
6
+ class Configuration
7
+ def as_json
8
+ raise NotImplementedError, "#{self.class} must implement #as_json"
9
+ end
10
+
11
+ def write(path)
12
+ path.write("#{JSON.pretty_generate(as_json)}\n")
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Manifold
4
+ module Terraform
5
+ # Represents a Terraform configuration for a Manifold project.
6
+ class ProjectConfiguration < Configuration
7
+ attr_reader :workspaces, :provider_version
8
+
9
+ DEFAULT_TERRAFORM_GOOGLE_PROVIDER_VERSION = "6.18.1"
10
+
11
+ def initialize(workspaces, provider_version: DEFAULT_TERRAFORM_GOOGLE_PROVIDER_VERSION)
12
+ super()
13
+ @workspaces = workspaces
14
+ @provider_version = provider_version
15
+ end
16
+
17
+ def as_json
18
+ {
19
+ "terraform" => terraform_block,
20
+ "provider" => provider_block,
21
+ "variable" => variables_block,
22
+ "module" => workspace_modules
23
+ }
24
+ end
25
+
26
+ private
27
+
28
+ def terraform_block
29
+ {
30
+ "required_providers" => {
31
+ "google" => {
32
+ "source" => "hashicorp/google",
33
+ "version" => provider_version
34
+ }
35
+ }
36
+ }
37
+ end
38
+
39
+ def provider_block
40
+ {
41
+ "google" => {
42
+ "project" => "${var.project_id}"
43
+ }
44
+ }
45
+ end
46
+
47
+ def variables_block
48
+ {
49
+ "project_id" => {
50
+ "description" => "The GCP project ID where resources will be created",
51
+ "type" => "string"
52
+ }
53
+ }
54
+ end
55
+
56
+ def workspace_modules
57
+ workspaces.each_with_object({}) do |workspace, modules|
58
+ modules[workspace.name] = {
59
+ "source" => "./workspaces/#{workspace.name}",
60
+ "project_id" => "${var.project_id}"
61
+ }
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Manifold
4
+ module Terraform
5
+ # Represents a Terraform configuration for a Manifold workspace.
6
+ class WorkspaceConfiguration < Configuration
7
+ attr_reader :name
8
+
9
+ def initialize(name)
10
+ super()
11
+ @name = name
12
+ @vectors = []
13
+ end
14
+
15
+ def add_vector(vector_config)
16
+ @vectors << vector_config
17
+ end
18
+
19
+ def as_json
20
+ {
21
+ "variable" => variables_block,
22
+ "resource" => {
23
+ "google_bigquery_dataset" => dataset_config,
24
+ "google_bigquery_table" => table_config,
25
+ "google_bigquery_routine" => routine_config
26
+ }.compact
27
+ }
28
+ end
29
+
30
+ private
31
+
32
+ def variables_block
33
+ {
34
+ "project_id" => {
35
+ "description" => "The GCP project ID where resources will be created",
36
+ "type" => "string"
37
+ }
38
+ }
39
+ end
40
+
41
+ def dataset_config
42
+ {
43
+ name => {
44
+ "dataset_id" => name,
45
+ "project" => "${var.project_id}",
46
+ "location" => "US"
47
+ }
48
+ }
49
+ end
50
+
51
+ def table_config
52
+ {
53
+ "dimensions" => {
54
+ "dataset_id" => name,
55
+ "project" => "${var.project_id}",
56
+ "table_id" => "Dimensions",
57
+ "schema" => "${file(\"${path.module}/tables/dimensions.json\")}",
58
+ "depends_on" => ["google_bigquery_dataset.#{name}"]
59
+ }
60
+ }
61
+ end
62
+
63
+ def routine_config
64
+ return nil if @vectors.empty?
65
+
66
+ routines = @vectors.filter_map { |vector| build_routine(vector) }
67
+ routines.empty? ? nil : routines.to_h
68
+ end
69
+
70
+ def build_routine(vector)
71
+ return nil unless vector["merge"]&.fetch("source", nil)
72
+
73
+ routine_name = "merge_#{vector["name"].downcase}_dimensions"
74
+ [routine_name, routine_attributes(routine_name, vector)]
75
+ end
76
+
77
+ def routine_attributes(routine_name, vector)
78
+ {
79
+ "dataset_id" => name,
80
+ "project" => "${var.project_id}",
81
+ "routine_id" => routine_name,
82
+ "routine_type" => "PROCEDURE",
83
+ "language" => "SQL",
84
+ "definition_body" => merge_routine_definition(vector),
85
+ "depends_on" => ["google_bigquery_dataset.#{name}"]
86
+ }
87
+ end
88
+
89
+ def merge_routine_definition(vector)
90
+ source_sql = read_source_sql(vector["merge"]["source"])
91
+ <<~SQL
92
+ MERGE #{name}.Dimensions AS TARGET
93
+ USING (
94
+ #{source_sql}
95
+ ) AS source
96
+ ON source.id = target.id
97
+ WHEN MATCHED THEN UPDATE SET target.#{vector["name"].downcase} = source.dimensions
98
+ WHEN NOT MATCHED THEN INSERT ROW;
99
+ SQL
100
+ end
101
+
102
+ def read_source_sql(source_path)
103
+ File.read(Pathname.pwd.join(source_path))
104
+ end
105
+ end
106
+ end
107
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Manifold
4
- VERSION = "0.0.8"
4
+ VERSION = "0.0.9"
5
5
  end
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: manifold-cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - claytongentry
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2024-11-22 00:00:00.000000000 Z
10
+ date: 2025-02-04 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: thor
@@ -24,7 +23,6 @@ dependencies:
24
23
  - - ">="
25
24
  - !ruby/object:Gem::Version
26
25
  version: '0'
27
- description:
28
26
  email:
29
27
  - clayton@bustle.com
30
28
  executables:
@@ -51,6 +49,9 @@ files:
51
49
  - lib/manifold/services/vector_service.rb
52
50
  - lib/manifold/templates/vector_template.yml
53
51
  - lib/manifold/templates/workspace_template.yml
52
+ - lib/manifold/terraform/configuration.rb
53
+ - lib/manifold/terraform/project_configuration.rb
54
+ - lib/manifold/terraform/workspace_configuration.rb
54
55
  - lib/manifold/version.rb
55
56
  - sig/manifold.rbs
56
57
  homepage: https://github.com/bustle/manifold
@@ -61,7 +62,6 @@ metadata:
61
62
  source_code_uri: https://github.com/bustle/manifold
62
63
  changelog_uri: https://github.com/bustle/manifold/CHANGELOG.md
63
64
  rubygems_mfa_required: 'true'
64
- post_install_message:
65
65
  rdoc_options: []
66
66
  require_paths:
67
67
  - lib
@@ -69,15 +69,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
69
69
  requirements:
70
70
  - - ">="
71
71
  - !ruby/object:Gem::Version
72
- version: 3.1.0
72
+ version: 3.2.0
73
73
  required_rubygems_version: !ruby/object:Gem::Requirement
74
74
  requirements:
75
75
  - - ">="
76
76
  - !ruby/object:Gem::Version
77
77
  version: '0'
78
78
  requirements: []
79
- rubygems_version: 3.5.22
80
- signing_key:
79
+ rubygems_version: 3.6.2
81
80
  specification_version: 4
82
81
  summary: A CLI for managing data infrastructures in BigQuery
83
82
  test_files: []