manifold-cli 0.0.7 → 0.0.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -1
- data/README.md +23 -1
- data/bin/manifold +1 -1
- data/lib/manifold/api/project.rb +50 -0
- data/lib/manifold/api/vector.rb +34 -0
- data/lib/manifold/api/workspace.rb +118 -0
- data/lib/manifold/cli.rb +13 -15
- data/lib/manifold/services/vector_service.rb +14 -5
- data/lib/manifold/templates/vector_template.yml +4 -0
- data/lib/manifold/terraform/configuration.rb +16 -0
- data/lib/manifold/terraform/project_configuration.rb +66 -0
- data/lib/manifold/terraform/workspace_configuration.rb +107 -0
- data/lib/manifold/version.rb +1 -1
- data/lib/manifold.rb +3 -1
- metadata +11 -12
- data/lib/manifold/project/project.rb +0 -33
- data/lib/manifold/project/vector.rb +0 -37
- data/lib/manifold/project/workspace.rb +0 -51
- data/lib/manifold/services/big_query_service.rb +0 -61
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1eb276f49eed54c682bf10a9edc53c567a4e61c1dc2c7ee1b27e98596c91d81f
|
4
|
+
data.tar.gz: 3e9374eeb7c36b5a5cd99b17a1d168b363ee161c23ffcae3ec54a170401ae34f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d1760a9ee8d1f2a8c78521761168696e50d87d6ca1194feb68f6a4c8a552d112624ba9c5512226f042bacedee1ae490a55f2e8ec33179e2cc588e26eb8f8e302
|
7
|
+
data.tar.gz: 57ff9863d1ce37d21ac58cdc8b634449754106a2472a3d1bfbebd6332d302b1ec39377ffb5dad882091df5b500bbb654c0ea385e2a60190486f903701a4d8035
|
data/.rubocop.yml
CHANGED
data/README.md
CHANGED
@@ -61,7 +61,29 @@ manifold add <data_project_name>
|
|
61
61
|
After you fill out the manifold.yml file, this command generates the necessary BigQuery schema files based on the specified dimensions and metrics.
|
62
62
|
|
63
63
|
```bash
|
64
|
-
manifold generate
|
64
|
+
manifold generate
|
65
|
+
```
|
66
|
+
|
67
|
+
4. **Generate Terraform Configuration (Optional)**
|
68
|
+
|
69
|
+
Manifold can optionally generate Terraform configurations for managing your BigQuery resources. To generate both BigQuery schemas and Terraform configurations, use the `--tf` flag:
|
70
|
+
|
71
|
+
```bash
|
72
|
+
manifold generate --tf
|
73
|
+
```
|
74
|
+
|
75
|
+
This will create:
|
76
|
+
|
77
|
+
- A root `main.tf.json` file that sets up the Google Cloud provider and workspace modules
|
78
|
+
- Individual workspace configurations in `workspaces/<workspace_name>/main.tf.json`
|
79
|
+
- Dataset and table definitions that reference your generated BigQuery schemas
|
80
|
+
|
81
|
+
The generated Terraform configurations use the Google Cloud provider and expect a `project_id` variable to be set. You can apply these configurations using standard Terraform commands:
|
82
|
+
|
83
|
+
```bash
|
84
|
+
terraform init
|
85
|
+
terraform plan -var="project_id=your-project-id"
|
86
|
+
terraform apply -var="project_id=your-project-id"
|
65
87
|
```
|
66
88
|
|
67
89
|
## Manifold Configuration
|
data/bin/manifold
CHANGED
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Manifold
|
4
|
+
module API
|
5
|
+
# Projects API
|
6
|
+
class Project
|
7
|
+
attr_reader :name, :logger, :directory
|
8
|
+
|
9
|
+
def initialize(name, logger: Logger.new($stdout), directory: Pathname.pwd.join(name))
|
10
|
+
@name = name
|
11
|
+
@logger = logger
|
12
|
+
@directory = Pathname(directory)
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.create(name, directory: Pathname.pwd.join(name))
|
16
|
+
new(name, directory:).tap do |project|
|
17
|
+
[project.workspaces_directory, project.vectors_directory].each(&:mkpath)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def workspaces
|
22
|
+
@workspaces ||= workspace_directories.map { |dir| Workspace.from_directory(dir, logger:) }
|
23
|
+
end
|
24
|
+
|
25
|
+
def generate(with_terraform: false)
|
26
|
+
workspaces.each { |w| w.generate(with_terraform:) }
|
27
|
+
generate_terraform_entrypoint if with_terraform
|
28
|
+
end
|
29
|
+
|
30
|
+
def workspaces_directory
|
31
|
+
directory.join("workspaces")
|
32
|
+
end
|
33
|
+
|
34
|
+
def vectors_directory
|
35
|
+
directory.join("vectors")
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def workspace_directories
|
41
|
+
workspaces_directory.children.select(&:directory?)
|
42
|
+
end
|
43
|
+
|
44
|
+
def generate_terraform_entrypoint
|
45
|
+
config = Terraform::ProjectConfiguration.new(workspaces)
|
46
|
+
config.write(directory.join("main.tf.json"))
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Manifold
|
4
|
+
module API
|
5
|
+
# Describes the entities for whom metrics are calculated.
|
6
|
+
class Vector
|
7
|
+
attr_reader :name, :template_path
|
8
|
+
|
9
|
+
DEFAULT_TEMPLATE_PATH = File.expand_path(
|
10
|
+
"../templates/vector_template.yml", __dir__
|
11
|
+
).freeze
|
12
|
+
|
13
|
+
def initialize(name, template_path: DEFAULT_TEMPLATE_PATH)
|
14
|
+
@name = name
|
15
|
+
@template_path = Pathname(template_path)
|
16
|
+
end
|
17
|
+
|
18
|
+
def add
|
19
|
+
directory.mkpath
|
20
|
+
FileUtils.cp(template_path, config_path)
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def directory
|
26
|
+
Pathname.pwd.join("vectors")
|
27
|
+
end
|
28
|
+
|
29
|
+
def config_path
|
30
|
+
directory.join("#{name.downcase}.yml")
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Manifold
|
4
|
+
module API
|
5
|
+
# Encapsulates a single manifold.
|
6
|
+
class Workspace
|
7
|
+
attr_reader :name, :template_path, :logger
|
8
|
+
|
9
|
+
DEFAULT_TEMPLATE_PATH = File.expand_path(
|
10
|
+
"../templates/workspace_template.yml", __dir__
|
11
|
+
).freeze
|
12
|
+
|
13
|
+
def initialize(name, template_path: DEFAULT_TEMPLATE_PATH, logger: Logger.new($stdout))
|
14
|
+
@name = name
|
15
|
+
@template_path = template_path
|
16
|
+
@logger = logger
|
17
|
+
@vector_service = Services::VectorService.new(logger)
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.from_directory(directory, logger: Logger.new($stdout))
|
21
|
+
new(directory.basename.to_s, logger:)
|
22
|
+
end
|
23
|
+
|
24
|
+
def add
|
25
|
+
[tables_directory, routines_directory].each(&:mkpath)
|
26
|
+
FileUtils.cp(template_path, manifold_path)
|
27
|
+
end
|
28
|
+
|
29
|
+
def generate(with_terraform: false)
|
30
|
+
return unless manifold_exists? && any_vectors?
|
31
|
+
|
32
|
+
generate_dimensions
|
33
|
+
generate_terraform if with_terraform
|
34
|
+
logger.info("Generated BigQuery dimensions table schema for workspace '#{name}'.")
|
35
|
+
end
|
36
|
+
|
37
|
+
def tables_directory
|
38
|
+
directory.join("tables")
|
39
|
+
end
|
40
|
+
|
41
|
+
def routines_directory
|
42
|
+
directory.join("routines")
|
43
|
+
end
|
44
|
+
|
45
|
+
def manifold_file
|
46
|
+
return nil unless manifold_exists?
|
47
|
+
|
48
|
+
File.new(manifold_path)
|
49
|
+
end
|
50
|
+
|
51
|
+
def manifold_exists?
|
52
|
+
manifold_path.file?
|
53
|
+
end
|
54
|
+
|
55
|
+
def manifold_path
|
56
|
+
directory.join("manifold.yml")
|
57
|
+
end
|
58
|
+
|
59
|
+
def terraform_main_path
|
60
|
+
directory.join("main.tf.json")
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
def directory
|
66
|
+
Pathname.pwd.join("workspaces", name)
|
67
|
+
end
|
68
|
+
|
69
|
+
def manifold_yaml
|
70
|
+
@manifold_yaml ||= YAML.safe_load_file(manifold_path)
|
71
|
+
end
|
72
|
+
|
73
|
+
def generate_dimensions
|
74
|
+
dimensions_path.write(dimensions_schema_json.concat("\n"))
|
75
|
+
end
|
76
|
+
|
77
|
+
def dimensions_schema
|
78
|
+
[
|
79
|
+
{ "type" => "STRING", "name" => "id", "mode" => "REQUIRED" },
|
80
|
+
{ "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED",
|
81
|
+
"fields" => dimensions_fields }
|
82
|
+
]
|
83
|
+
end
|
84
|
+
|
85
|
+
def dimensions_fields
|
86
|
+
vectors.filter_map do |vector|
|
87
|
+
logger.info("Loading vector schema for '#{vector}'.")
|
88
|
+
@vector_service.load_vector_schema(vector)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def dimensions_schema_json
|
93
|
+
JSON.pretty_generate(dimensions_schema)
|
94
|
+
end
|
95
|
+
|
96
|
+
def dimensions_path
|
97
|
+
tables_directory.join("dimensions.json")
|
98
|
+
end
|
99
|
+
|
100
|
+
def any_vectors?
|
101
|
+
!(vectors.nil? || vectors.empty?)
|
102
|
+
end
|
103
|
+
|
104
|
+
def vectors
|
105
|
+
manifold_yaml["vectors"]
|
106
|
+
end
|
107
|
+
|
108
|
+
def generate_terraform
|
109
|
+
config = Terraform::WorkspaceConfiguration.new(name)
|
110
|
+
vectors.each do |vector|
|
111
|
+
vector_config = @vector_service.load_vector_config(vector)
|
112
|
+
config.add_vector(vector_config)
|
113
|
+
end
|
114
|
+
config.write(terraform_main_path)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
data/lib/manifold/cli.rb
CHANGED
@@ -11,14 +11,12 @@ module Manifold
|
|
11
11
|
|
12
12
|
self.logger = logger
|
13
13
|
logger.level = Logger::INFO
|
14
|
-
|
15
|
-
self.bq_service = Services::BigQueryService.new(logger)
|
16
14
|
end
|
17
15
|
|
18
16
|
desc "init NAME", "Generate a new umbrella project for data management"
|
19
17
|
def init(name)
|
20
18
|
Manifold::API::Project.create(name)
|
21
|
-
logger.info "Created umbrella project '#{name}' with
|
19
|
+
logger.info "Created umbrella project '#{name}' with workspaces and vectors directories."
|
22
20
|
end
|
23
21
|
|
24
22
|
desc "vectors SUBCOMMAND ...ARGS", "Manage vectors"
|
@@ -33,28 +31,28 @@ module Manifold
|
|
33
31
|
end
|
34
32
|
|
35
33
|
desc "add VECTOR_NAME", "Add a new vector configuration"
|
36
|
-
def add(name
|
37
|
-
vector = API::Vector.new(name
|
34
|
+
def add(name)
|
35
|
+
vector = API::Vector.new(name)
|
38
36
|
vector.add
|
39
37
|
logger.info "Created vector configuration for '#{name}'."
|
40
38
|
end
|
41
39
|
}
|
42
40
|
|
43
41
|
desc "add WORKSPACE_NAME", "Add a new workspace to a project"
|
44
|
-
def add(name
|
45
|
-
workspace = API::Workspace.new(name
|
42
|
+
def add(name)
|
43
|
+
workspace = API::Workspace.new(name)
|
46
44
|
workspace.add
|
47
45
|
logger.info "Added workspace '#{name}' with tables and routines directories."
|
48
46
|
end
|
49
47
|
|
50
|
-
desc "generate
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
48
|
+
desc "generate", "Generate BigQuery schema for all workspaces in the project"
|
49
|
+
method_option :tf, type: :boolean, desc: "Generate Terraform configurations"
|
50
|
+
def generate
|
51
|
+
path = Pathname.pwd
|
52
|
+
name = path.basename.to_s
|
53
|
+
project = API::Project.new(name, directory: path, logger:)
|
54
|
+
project.generate(with_terraform: options[:tf])
|
55
|
+
logger.info "Generated BigQuery schema for all workspaces in the project."
|
58
56
|
end
|
59
57
|
end
|
60
58
|
end
|
@@ -10,14 +10,23 @@ module Manifold
|
|
10
10
|
|
11
11
|
def load_vector_schema(vector_name)
|
12
12
|
path = config_path(vector_name)
|
13
|
-
unless path.file?
|
14
|
-
@logger.error("Vector configuration not found: #{path}")
|
15
|
-
return nil
|
16
|
-
end
|
17
|
-
|
18
13
|
config = YAML.safe_load_file(path)
|
19
14
|
fields = transform_attributes_to_schema(config["attributes"])
|
20
15
|
{ "name" => vector_name.downcase, "type" => "RECORD", "fields" => fields }
|
16
|
+
rescue Errno::ENOENT, Errno::EISDIR
|
17
|
+
raise "Vector configuration not found: #{path}"
|
18
|
+
rescue Psych::Exception => e
|
19
|
+
raise "Invalid YAML in vector configuration #{path}: #{e.message}"
|
20
|
+
end
|
21
|
+
|
22
|
+
def load_vector_config(vector_name)
|
23
|
+
path = config_path(vector_name)
|
24
|
+
config = YAML.safe_load_file(path)
|
25
|
+
config.merge("name" => vector_name.downcase)
|
26
|
+
rescue Errno::ENOENT, Errno::EISDIR
|
27
|
+
raise "Vector configuration not found: #{path}"
|
28
|
+
rescue Psych::Exception => e
|
29
|
+
raise "Invalid YAML in vector configuration #{path}: #{e.message}"
|
21
30
|
end
|
22
31
|
|
23
32
|
private
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Manifold
|
4
|
+
module Terraform
|
5
|
+
# Provides a base class for Terraform configuration files.
|
6
|
+
class Configuration
|
7
|
+
def as_json
|
8
|
+
raise NotImplementedError, "#{self.class} must implement #as_json"
|
9
|
+
end
|
10
|
+
|
11
|
+
def write(path)
|
12
|
+
path.write("#{JSON.pretty_generate(as_json)}\n")
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Manifold
|
4
|
+
module Terraform
|
5
|
+
# Represents a Terraform configuration for a Manifold project.
|
6
|
+
class ProjectConfiguration < Configuration
|
7
|
+
attr_reader :workspaces, :provider_version
|
8
|
+
|
9
|
+
DEFAULT_TERRAFORM_GOOGLE_PROVIDER_VERSION = "6.18.1"
|
10
|
+
|
11
|
+
def initialize(workspaces, provider_version: DEFAULT_TERRAFORM_GOOGLE_PROVIDER_VERSION)
|
12
|
+
super()
|
13
|
+
@workspaces = workspaces
|
14
|
+
@provider_version = provider_version
|
15
|
+
end
|
16
|
+
|
17
|
+
def as_json
|
18
|
+
{
|
19
|
+
"terraform" => terraform_block,
|
20
|
+
"provider" => provider_block,
|
21
|
+
"variable" => variables_block,
|
22
|
+
"module" => workspace_modules
|
23
|
+
}
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def terraform_block
|
29
|
+
{
|
30
|
+
"required_providers" => {
|
31
|
+
"google" => {
|
32
|
+
"source" => "hashicorp/google",
|
33
|
+
"version" => provider_version
|
34
|
+
}
|
35
|
+
}
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
def provider_block
|
40
|
+
{
|
41
|
+
"google" => {
|
42
|
+
"project" => "${var.project_id}"
|
43
|
+
}
|
44
|
+
}
|
45
|
+
end
|
46
|
+
|
47
|
+
def variables_block
|
48
|
+
{
|
49
|
+
"project_id" => {
|
50
|
+
"description" => "The GCP project ID where resources will be created",
|
51
|
+
"type" => "string"
|
52
|
+
}
|
53
|
+
}
|
54
|
+
end
|
55
|
+
|
56
|
+
def workspace_modules
|
57
|
+
workspaces.each_with_object({}) do |workspace, modules|
|
58
|
+
modules[workspace.name] = {
|
59
|
+
"source" => "./workspaces/#{workspace.name}",
|
60
|
+
"project_id" => "${var.project_id}"
|
61
|
+
}
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Manifold
|
4
|
+
module Terraform
|
5
|
+
# Represents a Terraform configuration for a Manifold workspace.
|
6
|
+
class WorkspaceConfiguration < Configuration
|
7
|
+
attr_reader :name
|
8
|
+
|
9
|
+
def initialize(name)
|
10
|
+
super()
|
11
|
+
@name = name
|
12
|
+
@vectors = []
|
13
|
+
end
|
14
|
+
|
15
|
+
def add_vector(vector_config)
|
16
|
+
@vectors << vector_config
|
17
|
+
end
|
18
|
+
|
19
|
+
def as_json
|
20
|
+
{
|
21
|
+
"variable" => variables_block,
|
22
|
+
"resource" => {
|
23
|
+
"google_bigquery_dataset" => dataset_config,
|
24
|
+
"google_bigquery_table" => table_config,
|
25
|
+
"google_bigquery_routine" => routine_config
|
26
|
+
}.compact
|
27
|
+
}
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def variables_block
|
33
|
+
{
|
34
|
+
"project_id" => {
|
35
|
+
"description" => "The GCP project ID where resources will be created",
|
36
|
+
"type" => "string"
|
37
|
+
}
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
def dataset_config
|
42
|
+
{
|
43
|
+
name => {
|
44
|
+
"dataset_id" => name,
|
45
|
+
"project" => "${var.project_id}",
|
46
|
+
"location" => "US"
|
47
|
+
}
|
48
|
+
}
|
49
|
+
end
|
50
|
+
|
51
|
+
def table_config
|
52
|
+
{
|
53
|
+
"dimensions" => {
|
54
|
+
"dataset_id" => name,
|
55
|
+
"project" => "${var.project_id}",
|
56
|
+
"table_id" => "Dimensions",
|
57
|
+
"schema" => "${file(\"${path.module}/tables/dimensions.json\")}",
|
58
|
+
"depends_on" => ["google_bigquery_dataset.#{name}"]
|
59
|
+
}
|
60
|
+
}
|
61
|
+
end
|
62
|
+
|
63
|
+
def routine_config
|
64
|
+
return nil if @vectors.empty?
|
65
|
+
|
66
|
+
routines = @vectors.filter_map { |vector| build_routine(vector) }
|
67
|
+
routines.empty? ? nil : routines.to_h
|
68
|
+
end
|
69
|
+
|
70
|
+
def build_routine(vector)
|
71
|
+
return nil unless vector["merge"]&.fetch("source", nil)
|
72
|
+
|
73
|
+
routine_name = "merge_#{vector["name"].downcase}_dimensions"
|
74
|
+
[routine_name, routine_attributes(routine_name, vector)]
|
75
|
+
end
|
76
|
+
|
77
|
+
def routine_attributes(routine_name, vector)
|
78
|
+
{
|
79
|
+
"dataset_id" => name,
|
80
|
+
"project" => "${var.project_id}",
|
81
|
+
"routine_id" => routine_name,
|
82
|
+
"routine_type" => "PROCEDURE",
|
83
|
+
"language" => "SQL",
|
84
|
+
"definition_body" => merge_routine_definition(vector),
|
85
|
+
"depends_on" => ["google_bigquery_dataset.#{name}"]
|
86
|
+
}
|
87
|
+
end
|
88
|
+
|
89
|
+
def merge_routine_definition(vector)
|
90
|
+
source_sql = read_source_sql(vector["merge"]["source"])
|
91
|
+
<<~SQL
|
92
|
+
MERGE #{name}.Dimensions AS TARGET
|
93
|
+
USING (
|
94
|
+
#{source_sql}
|
95
|
+
) AS source
|
96
|
+
ON source.id = target.id
|
97
|
+
WHEN MATCHED THEN UPDATE SET target.#{vector["name"].downcase} = source.dimensions
|
98
|
+
WHEN NOT MATCHED THEN INSERT ROW;
|
99
|
+
SQL
|
100
|
+
end
|
101
|
+
|
102
|
+
def read_source_sql(source_path)
|
103
|
+
File.read(Pathname.pwd.join(source_path))
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
data/lib/manifold/version.rb
CHANGED
data/lib/manifold.rb
CHANGED
@@ -1,10 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require "json"
|
4
|
+
require "logger"
|
3
5
|
require "pathname"
|
4
6
|
require "thor"
|
5
7
|
require "yaml"
|
6
8
|
|
7
|
-
Dir[File.join(__dir__, "manifold", "**", "*.rb")].
|
9
|
+
Dir[File.join(__dir__, "manifold", "**", "*.rb")].each do |file|
|
8
10
|
require file
|
9
11
|
end
|
10
12
|
|
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: manifold-cli
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- claytongentry
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 2025-02-04 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: thor
|
@@ -24,7 +23,6 @@ dependencies:
|
|
24
23
|
- - ">="
|
25
24
|
- !ruby/object:Gem::Version
|
26
25
|
version: '0'
|
27
|
-
description:
|
28
26
|
email:
|
29
27
|
- clayton@bustle.com
|
30
28
|
executables:
|
@@ -44,14 +42,16 @@ files:
|
|
44
42
|
- docs/CONTRIBUTING.md
|
45
43
|
- lib/manifold.rb
|
46
44
|
- lib/manifold/api.rb
|
45
|
+
- lib/manifold/api/project.rb
|
46
|
+
- lib/manifold/api/vector.rb
|
47
|
+
- lib/manifold/api/workspace.rb
|
47
48
|
- lib/manifold/cli.rb
|
48
|
-
- lib/manifold/project/project.rb
|
49
|
-
- lib/manifold/project/vector.rb
|
50
|
-
- lib/manifold/project/workspace.rb
|
51
|
-
- lib/manifold/services/big_query_service.rb
|
52
49
|
- lib/manifold/services/vector_service.rb
|
53
50
|
- lib/manifold/templates/vector_template.yml
|
54
51
|
- lib/manifold/templates/workspace_template.yml
|
52
|
+
- lib/manifold/terraform/configuration.rb
|
53
|
+
- lib/manifold/terraform/project_configuration.rb
|
54
|
+
- lib/manifold/terraform/workspace_configuration.rb
|
55
55
|
- lib/manifold/version.rb
|
56
56
|
- sig/manifold.rbs
|
57
57
|
homepage: https://github.com/bustle/manifold
|
@@ -61,7 +61,7 @@ metadata:
|
|
61
61
|
homepage_uri: https://github.com/bustle/manifold
|
62
62
|
source_code_uri: https://github.com/bustle/manifold
|
63
63
|
changelog_uri: https://github.com/bustle/manifold/CHANGELOG.md
|
64
|
-
|
64
|
+
rubygems_mfa_required: 'true'
|
65
65
|
rdoc_options: []
|
66
66
|
require_paths:
|
67
67
|
- lib
|
@@ -69,15 +69,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
69
69
|
requirements:
|
70
70
|
- - ">="
|
71
71
|
- !ruby/object:Gem::Version
|
72
|
-
version: 3.
|
72
|
+
version: 3.2.0
|
73
73
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
74
|
requirements:
|
75
75
|
- - ">="
|
76
76
|
- !ruby/object:Gem::Version
|
77
77
|
version: '0'
|
78
78
|
requirements: []
|
79
|
-
rubygems_version: 3.
|
80
|
-
signing_key:
|
79
|
+
rubygems_version: 3.6.2
|
81
80
|
specification_version: 4
|
82
81
|
summary: A CLI for managing data infrastructures in BigQuery
|
83
82
|
test_files: []
|
@@ -1,33 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Manifold
|
4
|
-
module API
|
5
|
-
# Projects API
|
6
|
-
class Project
|
7
|
-
attr_reader :name, :directory
|
8
|
-
|
9
|
-
def initialize(name, directory: Pathname.pwd.join(name))
|
10
|
-
self.name = name
|
11
|
-
self.directory = Pathname(directory)
|
12
|
-
end
|
13
|
-
|
14
|
-
def self.create(name, directory: Pathname.pwd.join(name))
|
15
|
-
new(name, directory: directory).tap do |project|
|
16
|
-
[project.workspaces_directory, project.vectors_directory].each(&:mkpath)
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
def workspaces_directory
|
21
|
-
directory.join("workspaces")
|
22
|
-
end
|
23
|
-
|
24
|
-
def vectors_directory
|
25
|
-
directory.join("vectors")
|
26
|
-
end
|
27
|
-
|
28
|
-
private
|
29
|
-
|
30
|
-
attr_writer :name, :directory
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
@@ -1,37 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Manifold
|
4
|
-
module API
|
5
|
-
# Describes the entities for whom metrics are calculated.
|
6
|
-
class Vector
|
7
|
-
attr_reader :name, :project, :template_path
|
8
|
-
|
9
|
-
DEFAULT_TEMPLATE_PATH = Pathname.pwd.join(
|
10
|
-
"lib", "manifold", "templates", "vector_template.yml"
|
11
|
-
).freeze
|
12
|
-
|
13
|
-
def initialize(name, project:, template_path: DEFAULT_TEMPLATE_PATH)
|
14
|
-
self.name = name
|
15
|
-
self.project = project
|
16
|
-
self.template_path = Pathname(template_path)
|
17
|
-
end
|
18
|
-
|
19
|
-
def add
|
20
|
-
directory.mkpath
|
21
|
-
FileUtils.cp(template_path, config_path)
|
22
|
-
end
|
23
|
-
|
24
|
-
private
|
25
|
-
|
26
|
-
attr_writer :name, :project, :template_path
|
27
|
-
|
28
|
-
def directory
|
29
|
-
project.directory.join("vectors")
|
30
|
-
end
|
31
|
-
|
32
|
-
def config_path
|
33
|
-
directory.join("#{name.downcase}.yml")
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
@@ -1,51 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Manifold
|
4
|
-
module API
|
5
|
-
# Encapsulates a single manifold.
|
6
|
-
class Workspace
|
7
|
-
attr_reader :name, :project, :template_path
|
8
|
-
|
9
|
-
DEFAULT_TEMPLATE_PATH = Pathname.pwd.join(
|
10
|
-
"lib", "manifold", "templates", "workspace_template.yml"
|
11
|
-
)
|
12
|
-
|
13
|
-
def initialize(name, project:, template_path: DEFAULT_TEMPLATE_PATH)
|
14
|
-
self.name = name
|
15
|
-
self.project = project
|
16
|
-
self.template_path = template_path
|
17
|
-
end
|
18
|
-
|
19
|
-
def add
|
20
|
-
[tables_directory, routines_directory].each(&:mkpath)
|
21
|
-
FileUtils.cp(template_path, manifold_path)
|
22
|
-
end
|
23
|
-
|
24
|
-
def tables_directory
|
25
|
-
project.workspaces_directory.join(name, "tables")
|
26
|
-
end
|
27
|
-
|
28
|
-
def routines_directory
|
29
|
-
project.workspaces_directory.join(name, "routines")
|
30
|
-
end
|
31
|
-
|
32
|
-
def manifold_file
|
33
|
-
return nil unless manifold_exists?
|
34
|
-
|
35
|
-
File.new(manifold_path)
|
36
|
-
end
|
37
|
-
|
38
|
-
def manifold_exists?
|
39
|
-
manifold_path.file?
|
40
|
-
end
|
41
|
-
|
42
|
-
def manifold_path
|
43
|
-
project.workspaces_directory.join(name, "manifold.yml")
|
44
|
-
end
|
45
|
-
|
46
|
-
private
|
47
|
-
|
48
|
-
attr_writer :name, :project, :template_path
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
@@ -1,61 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Manifold
|
4
|
-
module Services
|
5
|
-
# Handles the generation of BigQuery schemas based on project configurations
|
6
|
-
class BigQueryService
|
7
|
-
def initialize(logger)
|
8
|
-
@logger = logger
|
9
|
-
@vector_service = Manifold::Services::VectorService.new(logger)
|
10
|
-
end
|
11
|
-
|
12
|
-
def generate_dimensions_schema(project_name)
|
13
|
-
config_path = Pathname.pwd.join("projects", project_name, "manifold.yml")
|
14
|
-
return unless validate_config_exists(config_path, project_name)
|
15
|
-
|
16
|
-
config = YAML.safe_load_file(config_path)
|
17
|
-
|
18
|
-
fields = config["vectors"].reduce([]) do |list, vector|
|
19
|
-
@logger.info("Loading vector schema for '#{vector}'.")
|
20
|
-
[*@vector_service.load_vector_schema(vector), *list]
|
21
|
-
end
|
22
|
-
|
23
|
-
create_dimensions_file(project_name, fields)
|
24
|
-
end
|
25
|
-
|
26
|
-
private
|
27
|
-
|
28
|
-
def validate_config_exists(config_path, project_name)
|
29
|
-
unless config_path.file?
|
30
|
-
@logger.error("Config file missing for project '#{project_name}'.")
|
31
|
-
return false
|
32
|
-
end
|
33
|
-
true
|
34
|
-
end
|
35
|
-
|
36
|
-
def create_dimensions_file(project_name, fields)
|
37
|
-
tables_directory(project_name).mkpath
|
38
|
-
dimensions = dimensions_schema(fields)
|
39
|
-
|
40
|
-
dimensions_path(project_name).write(dimensions)
|
41
|
-
@logger.info("Generated BigQuery dimensions table schema for '#{project_name}'.")
|
42
|
-
end
|
43
|
-
|
44
|
-
def dimensions_schema(fields)
|
45
|
-
JSON.pretty_generate([
|
46
|
-
{ "type" => "STRING", "name" => "id", "mode" => "REQUIRED" },
|
47
|
-
{ "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED",
|
48
|
-
"fields" => fields }
|
49
|
-
]).concat("\n")
|
50
|
-
end
|
51
|
-
|
52
|
-
def tables_directory(project_name)
|
53
|
-
Pathname.pwd.join("projects", project_name, "bq", "tables")
|
54
|
-
end
|
55
|
-
|
56
|
-
def dimensions_path(project_name)
|
57
|
-
tables_directory(project_name).join("dimensions.json")
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|