manifold-cli 0.0.8 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/README.md +23 -1
- data/lib/manifold/api/project.rb +8 -2
- data/lib/manifold/api/workspace.rb +15 -1
- data/lib/manifold/cli.rb +5 -3
- data/lib/manifold/services/vector_service.rb +10 -0
- data/lib/manifold/templates/vector_template.yml +4 -0
- data/lib/manifold/terraform/configuration.rb +16 -0
- data/lib/manifold/terraform/project_configuration.rb +66 -0
- data/lib/manifold/terraform/workspace_configuration.rb +107 -0
- data/lib/manifold/version.rb +1 -1
- metadata +7 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1eb276f49eed54c682bf10a9edc53c567a4e61c1dc2c7ee1b27e98596c91d81f
|
4
|
+
data.tar.gz: 3e9374eeb7c36b5a5cd99b17a1d168b363ee161c23ffcae3ec54a170401ae34f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d1760a9ee8d1f2a8c78521761168696e50d87d6ca1194feb68f6a4c8a552d112624ba9c5512226f042bacedee1ae490a55f2e8ec33179e2cc588e26eb8f8e302
|
7
|
+
data.tar.gz: 57ff9863d1ce37d21ac58cdc8b634449754106a2472a3d1bfbebd6332d302b1ec39377ffb5dad882091df5b500bbb654c0ea385e2a60190486f903701a4d8035
|
data/.rubocop.yml
CHANGED
data/README.md
CHANGED
@@ -61,7 +61,29 @@ manifold add <data_project_name>
|
|
61
61
|
After you fill out the manifold.yml file, this command generates the necessary BigQuery schema files based on the specified dimensions and metrics.
|
62
62
|
|
63
63
|
```bash
|
64
|
-
manifold generate
|
64
|
+
manifold generate
|
65
|
+
```
|
66
|
+
|
67
|
+
4. **Generate Terraform Configuration (Optional)**
|
68
|
+
|
69
|
+
Manifold can optionally generate Terraform configurations for managing your BigQuery resources. To generate both BigQuery schemas and Terraform configurations, use the `--tf` flag:
|
70
|
+
|
71
|
+
```bash
|
72
|
+
manifold generate --tf
|
73
|
+
```
|
74
|
+
|
75
|
+
This will create:
|
76
|
+
|
77
|
+
- A root `main.tf.json` file that sets up the Google Cloud provider and workspace modules
|
78
|
+
- Individual workspace configurations in `workspaces/<workspace_name>/main.tf.json`
|
79
|
+
- Dataset and table definitions that reference your generated BigQuery schemas
|
80
|
+
|
81
|
+
The generated Terraform configurations use the Google Cloud provider and expect a `project_id` variable to be set. You can apply these configurations using standard Terraform commands:
|
82
|
+
|
83
|
+
```bash
|
84
|
+
terraform init
|
85
|
+
terraform plan -var="project_id=your-project-id"
|
86
|
+
terraform apply -var="project_id=your-project-id"
|
65
87
|
```
|
66
88
|
|
67
89
|
## Manifold Configuration
|
data/lib/manifold/api/project.rb
CHANGED
@@ -22,8 +22,9 @@ module Manifold
|
|
22
22
|
@workspaces ||= workspace_directories.map { |dir| Workspace.from_directory(dir, logger:) }
|
23
23
|
end
|
24
24
|
|
25
|
-
def generate
|
26
|
-
workspaces.each(
|
25
|
+
def generate(with_terraform: false)
|
26
|
+
workspaces.each { |w| w.generate(with_terraform:) }
|
27
|
+
generate_terraform_entrypoint if with_terraform
|
27
28
|
end
|
28
29
|
|
29
30
|
def workspaces_directory
|
@@ -39,6 +40,11 @@ module Manifold
|
|
39
40
|
def workspace_directories
|
40
41
|
workspaces_directory.children.select(&:directory?)
|
41
42
|
end
|
43
|
+
|
44
|
+
def generate_terraform_entrypoint
|
45
|
+
config = Terraform::ProjectConfiguration.new(workspaces)
|
46
|
+
config.write(directory.join("main.tf.json"))
|
47
|
+
end
|
42
48
|
end
|
43
49
|
end
|
44
50
|
end
|
@@ -26,10 +26,11 @@ module Manifold
|
|
26
26
|
FileUtils.cp(template_path, manifold_path)
|
27
27
|
end
|
28
28
|
|
29
|
-
def generate
|
29
|
+
def generate(with_terraform: false)
|
30
30
|
return unless manifold_exists? && any_vectors?
|
31
31
|
|
32
32
|
generate_dimensions
|
33
|
+
generate_terraform if with_terraform
|
33
34
|
logger.info("Generated BigQuery dimensions table schema for workspace '#{name}'.")
|
34
35
|
end
|
35
36
|
|
@@ -55,6 +56,10 @@ module Manifold
|
|
55
56
|
directory.join("manifold.yml")
|
56
57
|
end
|
57
58
|
|
59
|
+
def terraform_main_path
|
60
|
+
directory.join("main.tf.json")
|
61
|
+
end
|
62
|
+
|
58
63
|
private
|
59
64
|
|
60
65
|
def directory
|
@@ -99,6 +104,15 @@ module Manifold
|
|
99
104
|
def vectors
|
100
105
|
manifold_yaml["vectors"]
|
101
106
|
end
|
107
|
+
|
108
|
+
def generate_terraform
|
109
|
+
config = Terraform::WorkspaceConfiguration.new(name)
|
110
|
+
vectors.each do |vector|
|
111
|
+
vector_config = @vector_service.load_vector_config(vector)
|
112
|
+
config.add_vector(vector_config)
|
113
|
+
end
|
114
|
+
config.write(terraform_main_path)
|
115
|
+
end
|
102
116
|
end
|
103
117
|
end
|
104
118
|
end
|
data/lib/manifold/cli.rb
CHANGED
@@ -46,10 +46,12 @@ module Manifold
|
|
46
46
|
end
|
47
47
|
|
48
48
|
desc "generate", "Generate BigQuery schema for all workspaces in the project"
|
49
|
+
method_option :tf, type: :boolean, desc: "Generate Terraform configurations"
|
49
50
|
def generate
|
50
|
-
|
51
|
-
|
52
|
-
project.
|
51
|
+
path = Pathname.pwd
|
52
|
+
name = path.basename.to_s
|
53
|
+
project = API::Project.new(name, directory: path, logger:)
|
54
|
+
project.generate(with_terraform: options[:tf])
|
53
55
|
logger.info "Generated BigQuery schema for all workspaces in the project."
|
54
56
|
end
|
55
57
|
end
|
@@ -19,6 +19,16 @@ module Manifold
|
|
19
19
|
raise "Invalid YAML in vector configuration #{path}: #{e.message}"
|
20
20
|
end
|
21
21
|
|
22
|
+
def load_vector_config(vector_name)
|
23
|
+
path = config_path(vector_name)
|
24
|
+
config = YAML.safe_load_file(path)
|
25
|
+
config.merge("name" => vector_name.downcase)
|
26
|
+
rescue Errno::ENOENT, Errno::EISDIR
|
27
|
+
raise "Vector configuration not found: #{path}"
|
28
|
+
rescue Psych::Exception => e
|
29
|
+
raise "Invalid YAML in vector configuration #{path}: #{e.message}"
|
30
|
+
end
|
31
|
+
|
22
32
|
private
|
23
33
|
|
24
34
|
def transform_attributes_to_schema(attributes)
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Manifold
|
4
|
+
module Terraform
|
5
|
+
# Provides a base class for Terraform configuration files.
|
6
|
+
class Configuration
|
7
|
+
def as_json
|
8
|
+
raise NotImplementedError, "#{self.class} must implement #as_json"
|
9
|
+
end
|
10
|
+
|
11
|
+
def write(path)
|
12
|
+
path.write("#{JSON.pretty_generate(as_json)}\n")
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Manifold
|
4
|
+
module Terraform
|
5
|
+
# Represents a Terraform configuration for a Manifold project.
|
6
|
+
class ProjectConfiguration < Configuration
|
7
|
+
attr_reader :workspaces, :provider_version
|
8
|
+
|
9
|
+
DEFAULT_TERRAFORM_GOOGLE_PROVIDER_VERSION = "6.18.1"
|
10
|
+
|
11
|
+
def initialize(workspaces, provider_version: DEFAULT_TERRAFORM_GOOGLE_PROVIDER_VERSION)
|
12
|
+
super()
|
13
|
+
@workspaces = workspaces
|
14
|
+
@provider_version = provider_version
|
15
|
+
end
|
16
|
+
|
17
|
+
def as_json
|
18
|
+
{
|
19
|
+
"terraform" => terraform_block,
|
20
|
+
"provider" => provider_block,
|
21
|
+
"variable" => variables_block,
|
22
|
+
"module" => workspace_modules
|
23
|
+
}
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def terraform_block
|
29
|
+
{
|
30
|
+
"required_providers" => {
|
31
|
+
"google" => {
|
32
|
+
"source" => "hashicorp/google",
|
33
|
+
"version" => provider_version
|
34
|
+
}
|
35
|
+
}
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
def provider_block
|
40
|
+
{
|
41
|
+
"google" => {
|
42
|
+
"project" => "${var.project_id}"
|
43
|
+
}
|
44
|
+
}
|
45
|
+
end
|
46
|
+
|
47
|
+
def variables_block
|
48
|
+
{
|
49
|
+
"project_id" => {
|
50
|
+
"description" => "The GCP project ID where resources will be created",
|
51
|
+
"type" => "string"
|
52
|
+
}
|
53
|
+
}
|
54
|
+
end
|
55
|
+
|
56
|
+
def workspace_modules
|
57
|
+
workspaces.each_with_object({}) do |workspace, modules|
|
58
|
+
modules[workspace.name] = {
|
59
|
+
"source" => "./workspaces/#{workspace.name}",
|
60
|
+
"project_id" => "${var.project_id}"
|
61
|
+
}
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Manifold
|
4
|
+
module Terraform
|
5
|
+
# Represents a Terraform configuration for a Manifold workspace.
|
6
|
+
class WorkspaceConfiguration < Configuration
|
7
|
+
attr_reader :name
|
8
|
+
|
9
|
+
def initialize(name)
|
10
|
+
super()
|
11
|
+
@name = name
|
12
|
+
@vectors = []
|
13
|
+
end
|
14
|
+
|
15
|
+
def add_vector(vector_config)
|
16
|
+
@vectors << vector_config
|
17
|
+
end
|
18
|
+
|
19
|
+
def as_json
|
20
|
+
{
|
21
|
+
"variable" => variables_block,
|
22
|
+
"resource" => {
|
23
|
+
"google_bigquery_dataset" => dataset_config,
|
24
|
+
"google_bigquery_table" => table_config,
|
25
|
+
"google_bigquery_routine" => routine_config
|
26
|
+
}.compact
|
27
|
+
}
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def variables_block
|
33
|
+
{
|
34
|
+
"project_id" => {
|
35
|
+
"description" => "The GCP project ID where resources will be created",
|
36
|
+
"type" => "string"
|
37
|
+
}
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
def dataset_config
|
42
|
+
{
|
43
|
+
name => {
|
44
|
+
"dataset_id" => name,
|
45
|
+
"project" => "${var.project_id}",
|
46
|
+
"location" => "US"
|
47
|
+
}
|
48
|
+
}
|
49
|
+
end
|
50
|
+
|
51
|
+
def table_config
|
52
|
+
{
|
53
|
+
"dimensions" => {
|
54
|
+
"dataset_id" => name,
|
55
|
+
"project" => "${var.project_id}",
|
56
|
+
"table_id" => "Dimensions",
|
57
|
+
"schema" => "${file(\"${path.module}/tables/dimensions.json\")}",
|
58
|
+
"depends_on" => ["google_bigquery_dataset.#{name}"]
|
59
|
+
}
|
60
|
+
}
|
61
|
+
end
|
62
|
+
|
63
|
+
def routine_config
|
64
|
+
return nil if @vectors.empty?
|
65
|
+
|
66
|
+
routines = @vectors.filter_map { |vector| build_routine(vector) }
|
67
|
+
routines.empty? ? nil : routines.to_h
|
68
|
+
end
|
69
|
+
|
70
|
+
def build_routine(vector)
|
71
|
+
return nil unless vector["merge"]&.fetch("source", nil)
|
72
|
+
|
73
|
+
routine_name = "merge_#{vector["name"].downcase}_dimensions"
|
74
|
+
[routine_name, routine_attributes(routine_name, vector)]
|
75
|
+
end
|
76
|
+
|
77
|
+
def routine_attributes(routine_name, vector)
|
78
|
+
{
|
79
|
+
"dataset_id" => name,
|
80
|
+
"project" => "${var.project_id}",
|
81
|
+
"routine_id" => routine_name,
|
82
|
+
"routine_type" => "PROCEDURE",
|
83
|
+
"language" => "SQL",
|
84
|
+
"definition_body" => merge_routine_definition(vector),
|
85
|
+
"depends_on" => ["google_bigquery_dataset.#{name}"]
|
86
|
+
}
|
87
|
+
end
|
88
|
+
|
89
|
+
def merge_routine_definition(vector)
|
90
|
+
source_sql = read_source_sql(vector["merge"]["source"])
|
91
|
+
<<~SQL
|
92
|
+
MERGE #{name}.Dimensions AS TARGET
|
93
|
+
USING (
|
94
|
+
#{source_sql}
|
95
|
+
) AS source
|
96
|
+
ON source.id = target.id
|
97
|
+
WHEN MATCHED THEN UPDATE SET target.#{vector["name"].downcase} = source.dimensions
|
98
|
+
WHEN NOT MATCHED THEN INSERT ROW;
|
99
|
+
SQL
|
100
|
+
end
|
101
|
+
|
102
|
+
def read_source_sql(source_path)
|
103
|
+
File.read(Pathname.pwd.join(source_path))
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
data/lib/manifold/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: manifold-cli
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- claytongentry
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 2025-02-04 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: thor
|
@@ -24,7 +23,6 @@ dependencies:
|
|
24
23
|
- - ">="
|
25
24
|
- !ruby/object:Gem::Version
|
26
25
|
version: '0'
|
27
|
-
description:
|
28
26
|
email:
|
29
27
|
- clayton@bustle.com
|
30
28
|
executables:
|
@@ -51,6 +49,9 @@ files:
|
|
51
49
|
- lib/manifold/services/vector_service.rb
|
52
50
|
- lib/manifold/templates/vector_template.yml
|
53
51
|
- lib/manifold/templates/workspace_template.yml
|
52
|
+
- lib/manifold/terraform/configuration.rb
|
53
|
+
- lib/manifold/terraform/project_configuration.rb
|
54
|
+
- lib/manifold/terraform/workspace_configuration.rb
|
54
55
|
- lib/manifold/version.rb
|
55
56
|
- sig/manifold.rbs
|
56
57
|
homepage: https://github.com/bustle/manifold
|
@@ -61,7 +62,6 @@ metadata:
|
|
61
62
|
source_code_uri: https://github.com/bustle/manifold
|
62
63
|
changelog_uri: https://github.com/bustle/manifold/CHANGELOG.md
|
63
64
|
rubygems_mfa_required: 'true'
|
64
|
-
post_install_message:
|
65
65
|
rdoc_options: []
|
66
66
|
require_paths:
|
67
67
|
- lib
|
@@ -69,15 +69,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
69
69
|
requirements:
|
70
70
|
- - ">="
|
71
71
|
- !ruby/object:Gem::Version
|
72
|
-
version: 3.
|
72
|
+
version: 3.2.0
|
73
73
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
74
|
requirements:
|
75
75
|
- - ">="
|
76
76
|
- !ruby/object:Gem::Version
|
77
77
|
version: '0'
|
78
78
|
requirements: []
|
79
|
-
rubygems_version: 3.
|
80
|
-
signing_key:
|
79
|
+
rubygems_version: 3.6.2
|
81
80
|
specification_version: 4
|
82
81
|
summary: A CLI for managing data infrastructures in BigQuery
|
83
82
|
test_files: []
|