manifold-cli 0.0.8 → 0.0.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/README.md +23 -1
- data/lib/manifold/api/project.rb +8 -2
- data/lib/manifold/api/workspace.rb +15 -1
- data/lib/manifold/cli.rb +5 -3
- data/lib/manifold/services/vector_service.rb +10 -0
- data/lib/manifold/templates/vector_template.yml +4 -0
- data/lib/manifold/terraform/configuration.rb +16 -0
- data/lib/manifold/terraform/project_configuration.rb +66 -0
- data/lib/manifold/terraform/workspace_configuration.rb +107 -0
- data/lib/manifold/version.rb +1 -1
- metadata +7 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1eb276f49eed54c682bf10a9edc53c567a4e61c1dc2c7ee1b27e98596c91d81f
|
4
|
+
data.tar.gz: 3e9374eeb7c36b5a5cd99b17a1d168b363ee161c23ffcae3ec54a170401ae34f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d1760a9ee8d1f2a8c78521761168696e50d87d6ca1194feb68f6a4c8a552d112624ba9c5512226f042bacedee1ae490a55f2e8ec33179e2cc588e26eb8f8e302
|
7
|
+
data.tar.gz: 57ff9863d1ce37d21ac58cdc8b634449754106a2472a3d1bfbebd6332d302b1ec39377ffb5dad882091df5b500bbb654c0ea385e2a60190486f903701a4d8035
|
data/.rubocop.yml
CHANGED
data/README.md
CHANGED
@@ -61,7 +61,29 @@ manifold add <data_project_name>
|
|
61
61
|
After you fill out the manifold.yml file, this command generates the necessary BigQuery schema files based on the specified dimensions and metrics.
|
62
62
|
|
63
63
|
```bash
|
64
|
-
manifold generate
|
64
|
+
manifold generate
|
65
|
+
```
|
66
|
+
|
67
|
+
4. **Generate Terraform Configuration (Optional)**
|
68
|
+
|
69
|
+
Manifold can optionally generate Terraform configurations for managing your BigQuery resources. To generate both BigQuery schemas and Terraform configurations, use the `--tf` flag:
|
70
|
+
|
71
|
+
```bash
|
72
|
+
manifold generate --tf
|
73
|
+
```
|
74
|
+
|
75
|
+
This will create:
|
76
|
+
|
77
|
+
- A root `main.tf.json` file that sets up the Google Cloud provider and workspace modules
|
78
|
+
- Individual workspace configurations in `workspaces/<workspace_name>/main.tf.json`
|
79
|
+
- Dataset and table definitions that reference your generated BigQuery schemas
|
80
|
+
|
81
|
+
The generated Terraform configurations use the Google Cloud provider and expect a `project_id` variable to be set. You can apply these configurations using standard Terraform commands:
|
82
|
+
|
83
|
+
```bash
|
84
|
+
terraform init
|
85
|
+
terraform plan -var="project_id=your-project-id"
|
86
|
+
terraform apply -var="project_id=your-project-id"
|
65
87
|
```
|
66
88
|
|
67
89
|
## Manifold Configuration
|
data/lib/manifold/api/project.rb
CHANGED
@@ -22,8 +22,9 @@ module Manifold
|
|
22
22
|
@workspaces ||= workspace_directories.map { |dir| Workspace.from_directory(dir, logger:) }
|
23
23
|
end
|
24
24
|
|
25
|
-
def generate
|
26
|
-
workspaces.each(
|
25
|
+
def generate(with_terraform: false)
|
26
|
+
workspaces.each { |w| w.generate(with_terraform:) }
|
27
|
+
generate_terraform_entrypoint if with_terraform
|
27
28
|
end
|
28
29
|
|
29
30
|
def workspaces_directory
|
@@ -39,6 +40,11 @@ module Manifold
|
|
39
40
|
def workspace_directories
|
40
41
|
workspaces_directory.children.select(&:directory?)
|
41
42
|
end
|
43
|
+
|
44
|
+
def generate_terraform_entrypoint
|
45
|
+
config = Terraform::ProjectConfiguration.new(workspaces)
|
46
|
+
config.write(directory.join("main.tf.json"))
|
47
|
+
end
|
42
48
|
end
|
43
49
|
end
|
44
50
|
end
|
@@ -26,10 +26,11 @@ module Manifold
|
|
26
26
|
FileUtils.cp(template_path, manifold_path)
|
27
27
|
end
|
28
28
|
|
29
|
-
def generate
|
29
|
+
def generate(with_terraform: false)
|
30
30
|
return unless manifold_exists? && any_vectors?
|
31
31
|
|
32
32
|
generate_dimensions
|
33
|
+
generate_terraform if with_terraform
|
33
34
|
logger.info("Generated BigQuery dimensions table schema for workspace '#{name}'.")
|
34
35
|
end
|
35
36
|
|
@@ -55,6 +56,10 @@ module Manifold
|
|
55
56
|
directory.join("manifold.yml")
|
56
57
|
end
|
57
58
|
|
59
|
+
def terraform_main_path
|
60
|
+
directory.join("main.tf.json")
|
61
|
+
end
|
62
|
+
|
58
63
|
private
|
59
64
|
|
60
65
|
def directory
|
@@ -99,6 +104,15 @@ module Manifold
|
|
99
104
|
def vectors
|
100
105
|
manifold_yaml["vectors"]
|
101
106
|
end
|
107
|
+
|
108
|
+
def generate_terraform
|
109
|
+
config = Terraform::WorkspaceConfiguration.new(name)
|
110
|
+
vectors.each do |vector|
|
111
|
+
vector_config = @vector_service.load_vector_config(vector)
|
112
|
+
config.add_vector(vector_config)
|
113
|
+
end
|
114
|
+
config.write(terraform_main_path)
|
115
|
+
end
|
102
116
|
end
|
103
117
|
end
|
104
118
|
end
|
data/lib/manifold/cli.rb
CHANGED
@@ -46,10 +46,12 @@ module Manifold
|
|
46
46
|
end
|
47
47
|
|
48
48
|
desc "generate", "Generate BigQuery schema for all workspaces in the project"
|
49
|
+
method_option :tf, type: :boolean, desc: "Generate Terraform configurations"
|
49
50
|
def generate
|
50
|
-
|
51
|
-
|
52
|
-
project.
|
51
|
+
path = Pathname.pwd
|
52
|
+
name = path.basename.to_s
|
53
|
+
project = API::Project.new(name, directory: path, logger:)
|
54
|
+
project.generate(with_terraform: options[:tf])
|
53
55
|
logger.info "Generated BigQuery schema for all workspaces in the project."
|
54
56
|
end
|
55
57
|
end
|
@@ -19,6 +19,16 @@ module Manifold
|
|
19
19
|
raise "Invalid YAML in vector configuration #{path}: #{e.message}"
|
20
20
|
end
|
21
21
|
|
22
|
+
def load_vector_config(vector_name)
|
23
|
+
path = config_path(vector_name)
|
24
|
+
config = YAML.safe_load_file(path)
|
25
|
+
config.merge("name" => vector_name.downcase)
|
26
|
+
rescue Errno::ENOENT, Errno::EISDIR
|
27
|
+
raise "Vector configuration not found: #{path}"
|
28
|
+
rescue Psych::Exception => e
|
29
|
+
raise "Invalid YAML in vector configuration #{path}: #{e.message}"
|
30
|
+
end
|
31
|
+
|
22
32
|
private
|
23
33
|
|
24
34
|
def transform_attributes_to_schema(attributes)
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Manifold
|
4
|
+
module Terraform
|
5
|
+
# Provides a base class for Terraform configuration files.
|
6
|
+
class Configuration
|
7
|
+
def as_json
|
8
|
+
raise NotImplementedError, "#{self.class} must implement #as_json"
|
9
|
+
end
|
10
|
+
|
11
|
+
def write(path)
|
12
|
+
path.write("#{JSON.pretty_generate(as_json)}\n")
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Manifold
|
4
|
+
module Terraform
|
5
|
+
# Represents a Terraform configuration for a Manifold project.
|
6
|
+
class ProjectConfiguration < Configuration
|
7
|
+
attr_reader :workspaces, :provider_version
|
8
|
+
|
9
|
+
DEFAULT_TERRAFORM_GOOGLE_PROVIDER_VERSION = "6.18.1"
|
10
|
+
|
11
|
+
def initialize(workspaces, provider_version: DEFAULT_TERRAFORM_GOOGLE_PROVIDER_VERSION)
|
12
|
+
super()
|
13
|
+
@workspaces = workspaces
|
14
|
+
@provider_version = provider_version
|
15
|
+
end
|
16
|
+
|
17
|
+
def as_json
|
18
|
+
{
|
19
|
+
"terraform" => terraform_block,
|
20
|
+
"provider" => provider_block,
|
21
|
+
"variable" => variables_block,
|
22
|
+
"module" => workspace_modules
|
23
|
+
}
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def terraform_block
|
29
|
+
{
|
30
|
+
"required_providers" => {
|
31
|
+
"google" => {
|
32
|
+
"source" => "hashicorp/google",
|
33
|
+
"version" => provider_version
|
34
|
+
}
|
35
|
+
}
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
def provider_block
|
40
|
+
{
|
41
|
+
"google" => {
|
42
|
+
"project" => "${var.project_id}"
|
43
|
+
}
|
44
|
+
}
|
45
|
+
end
|
46
|
+
|
47
|
+
def variables_block
|
48
|
+
{
|
49
|
+
"project_id" => {
|
50
|
+
"description" => "The GCP project ID where resources will be created",
|
51
|
+
"type" => "string"
|
52
|
+
}
|
53
|
+
}
|
54
|
+
end
|
55
|
+
|
56
|
+
def workspace_modules
|
57
|
+
workspaces.each_with_object({}) do |workspace, modules|
|
58
|
+
modules[workspace.name] = {
|
59
|
+
"source" => "./workspaces/#{workspace.name}",
|
60
|
+
"project_id" => "${var.project_id}"
|
61
|
+
}
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Manifold
|
4
|
+
module Terraform
|
5
|
+
# Represents a Terraform configuration for a Manifold workspace.
|
6
|
+
class WorkspaceConfiguration < Configuration
|
7
|
+
attr_reader :name
|
8
|
+
|
9
|
+
def initialize(name)
|
10
|
+
super()
|
11
|
+
@name = name
|
12
|
+
@vectors = []
|
13
|
+
end
|
14
|
+
|
15
|
+
def add_vector(vector_config)
|
16
|
+
@vectors << vector_config
|
17
|
+
end
|
18
|
+
|
19
|
+
def as_json
|
20
|
+
{
|
21
|
+
"variable" => variables_block,
|
22
|
+
"resource" => {
|
23
|
+
"google_bigquery_dataset" => dataset_config,
|
24
|
+
"google_bigquery_table" => table_config,
|
25
|
+
"google_bigquery_routine" => routine_config
|
26
|
+
}.compact
|
27
|
+
}
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def variables_block
|
33
|
+
{
|
34
|
+
"project_id" => {
|
35
|
+
"description" => "The GCP project ID where resources will be created",
|
36
|
+
"type" => "string"
|
37
|
+
}
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
def dataset_config
|
42
|
+
{
|
43
|
+
name => {
|
44
|
+
"dataset_id" => name,
|
45
|
+
"project" => "${var.project_id}",
|
46
|
+
"location" => "US"
|
47
|
+
}
|
48
|
+
}
|
49
|
+
end
|
50
|
+
|
51
|
+
def table_config
|
52
|
+
{
|
53
|
+
"dimensions" => {
|
54
|
+
"dataset_id" => name,
|
55
|
+
"project" => "${var.project_id}",
|
56
|
+
"table_id" => "Dimensions",
|
57
|
+
"schema" => "${file(\"${path.module}/tables/dimensions.json\")}",
|
58
|
+
"depends_on" => ["google_bigquery_dataset.#{name}"]
|
59
|
+
}
|
60
|
+
}
|
61
|
+
end
|
62
|
+
|
63
|
+
def routine_config
|
64
|
+
return nil if @vectors.empty?
|
65
|
+
|
66
|
+
routines = @vectors.filter_map { |vector| build_routine(vector) }
|
67
|
+
routines.empty? ? nil : routines.to_h
|
68
|
+
end
|
69
|
+
|
70
|
+
def build_routine(vector)
|
71
|
+
return nil unless vector["merge"]&.fetch("source", nil)
|
72
|
+
|
73
|
+
routine_name = "merge_#{vector["name"].downcase}_dimensions"
|
74
|
+
[routine_name, routine_attributes(routine_name, vector)]
|
75
|
+
end
|
76
|
+
|
77
|
+
def routine_attributes(routine_name, vector)
|
78
|
+
{
|
79
|
+
"dataset_id" => name,
|
80
|
+
"project" => "${var.project_id}",
|
81
|
+
"routine_id" => routine_name,
|
82
|
+
"routine_type" => "PROCEDURE",
|
83
|
+
"language" => "SQL",
|
84
|
+
"definition_body" => merge_routine_definition(vector),
|
85
|
+
"depends_on" => ["google_bigquery_dataset.#{name}"]
|
86
|
+
}
|
87
|
+
end
|
88
|
+
|
89
|
+
def merge_routine_definition(vector)
|
90
|
+
source_sql = read_source_sql(vector["merge"]["source"])
|
91
|
+
<<~SQL
|
92
|
+
MERGE #{name}.Dimensions AS TARGET
|
93
|
+
USING (
|
94
|
+
#{source_sql}
|
95
|
+
) AS source
|
96
|
+
ON source.id = target.id
|
97
|
+
WHEN MATCHED THEN UPDATE SET target.#{vector["name"].downcase} = source.dimensions
|
98
|
+
WHEN NOT MATCHED THEN INSERT ROW;
|
99
|
+
SQL
|
100
|
+
end
|
101
|
+
|
102
|
+
def read_source_sql(source_path)
|
103
|
+
File.read(Pathname.pwd.join(source_path))
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
data/lib/manifold/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: manifold-cli
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- claytongentry
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 2025-02-04 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: thor
|
@@ -24,7 +23,6 @@ dependencies:
|
|
24
23
|
- - ">="
|
25
24
|
- !ruby/object:Gem::Version
|
26
25
|
version: '0'
|
27
|
-
description:
|
28
26
|
email:
|
29
27
|
- clayton@bustle.com
|
30
28
|
executables:
|
@@ -51,6 +49,9 @@ files:
|
|
51
49
|
- lib/manifold/services/vector_service.rb
|
52
50
|
- lib/manifold/templates/vector_template.yml
|
53
51
|
- lib/manifold/templates/workspace_template.yml
|
52
|
+
- lib/manifold/terraform/configuration.rb
|
53
|
+
- lib/manifold/terraform/project_configuration.rb
|
54
|
+
- lib/manifold/terraform/workspace_configuration.rb
|
54
55
|
- lib/manifold/version.rb
|
55
56
|
- sig/manifold.rbs
|
56
57
|
homepage: https://github.com/bustle/manifold
|
@@ -61,7 +62,6 @@ metadata:
|
|
61
62
|
source_code_uri: https://github.com/bustle/manifold
|
62
63
|
changelog_uri: https://github.com/bustle/manifold/CHANGELOG.md
|
63
64
|
rubygems_mfa_required: 'true'
|
64
|
-
post_install_message:
|
65
65
|
rdoc_options: []
|
66
66
|
require_paths:
|
67
67
|
- lib
|
@@ -69,15 +69,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
69
69
|
requirements:
|
70
70
|
- - ">="
|
71
71
|
- !ruby/object:Gem::Version
|
72
|
-
version: 3.
|
72
|
+
version: 3.2.0
|
73
73
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
74
|
requirements:
|
75
75
|
- - ">="
|
76
76
|
- !ruby/object:Gem::Version
|
77
77
|
version: '0'
|
78
78
|
requirements: []
|
79
|
-
rubygems_version: 3.
|
80
|
-
signing_key:
|
79
|
+
rubygems_version: 3.6.2
|
81
80
|
specification_version: 4
|
82
81
|
summary: A CLI for managing data infrastructures in BigQuery
|
83
82
|
test_files: []
|