manifold-cli 0.0.7 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f50907061a12bacfb9d34d83f56fcf6bc98317c6516a216340450fef78cebe3f
4
- data.tar.gz: b1722162e241d3b7275bd5675c457ce53c264d425885cc1bf35b9ae4bc62db99
3
+ metadata.gz: 1eb276f49eed54c682bf10a9edc53c567a4e61c1dc2c7ee1b27e98596c91d81f
4
+ data.tar.gz: 3e9374eeb7c36b5a5cd99b17a1d168b363ee161c23ffcae3ec54a170401ae34f
5
5
  SHA512:
6
- metadata.gz: aa63a9e3a52d441d6f742efb8bab2037d67f2d12b11f8f22b835949ae3be26c3e7a01a2b15b288fcc9c9222fccd2ccb4ade3f091f01f52c678caded9fcf6733a
7
- data.tar.gz: df8807050b34f49d84741afcfad06a9c4d20406f3e67e8ad9a531fc523d47d120f630693660624ff1a2a6ba3c19d485bb7bc6340d7e70bd2aebe96f425821668
6
+ metadata.gz: d1760a9ee8d1f2a8c78521761168696e50d87d6ca1194feb68f6a4c8a552d112624ba9c5512226f042bacedee1ae490a55f2e8ec33179e2cc588e26eb8f8e302
7
+ data.tar.gz: 57ff9863d1ce37d21ac58cdc8b634449754106a2472a3d1bfbebd6332d302b1ec39377ffb5dad882091df5b500bbb654c0ea385e2a60190486f903701a4d8035
data/.rubocop.yml CHANGED
@@ -3,7 +3,8 @@ inherit_from: .rubocop_todo.yml
3
3
  require: rubocop-rspec
4
4
 
5
5
  AllCops:
6
- TargetRubyVersion: 3.0
6
+ NewCops: enable
7
+ TargetRubyVersion: 3.2
7
8
 
8
9
  Style/StringLiterals:
9
10
  EnforcedStyle: double_quotes
data/README.md CHANGED
@@ -61,7 +61,29 @@ manifold add <data_project_name>
61
61
  After you fill out the manifold.yml file, this command generates the necessary BigQuery schema files based on the specified dimensions and metrics.
62
62
 
63
63
  ```bash
64
- manifold generate <data_project_name> bq
64
+ manifold generate
65
+ ```
66
+
67
+ 4. **Generate Terraform Configuration (Optional)**
68
+
69
+ Manifold can optionally generate Terraform configurations for managing your BigQuery resources. To generate both BigQuery schemas and Terraform configurations, use the `--tf` flag:
70
+
71
+ ```bash
72
+ manifold generate --tf
73
+ ```
74
+
75
+ This will create:
76
+
77
+ - A root `main.tf.json` file that sets up the Google Cloud provider and workspace modules
78
+ - Individual workspace configurations in `workspaces/<workspace_name>/main.tf.json`
79
+ - Dataset and table definitions that reference your generated BigQuery schemas
80
+
81
+ The generated Terraform configurations use the Google Cloud provider and expect a `project_id` variable to be set. You can apply these configurations using standard Terraform commands:
82
+
83
+ ```bash
84
+ terraform init
85
+ terraform plan -var="project_id=your-project-id"
86
+ terraform apply -var="project_id=your-project-id"
65
87
  ```
66
88
 
67
89
  ## Manifold Configuration
data/bin/manifold CHANGED
@@ -4,6 +4,6 @@
4
4
  lib_path = File.expand_path("../lib", Dir.pwd)
5
5
  $LOAD_PATH.unshift(lib_path) unless $LOAD_PATH.include?(lib_path)
6
6
 
7
- require "manifold/cli"
7
+ require "manifold"
8
8
 
9
9
  Manifold::CLI.start(ARGV)
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Manifold
4
+ module API
5
+ # Projects API
6
+ class Project
7
+ attr_reader :name, :logger, :directory
8
+
9
+ def initialize(name, logger: Logger.new($stdout), directory: Pathname.pwd.join(name))
10
+ @name = name
11
+ @logger = logger
12
+ @directory = Pathname(directory)
13
+ end
14
+
15
+ def self.create(name, directory: Pathname.pwd.join(name))
16
+ new(name, directory:).tap do |project|
17
+ [project.workspaces_directory, project.vectors_directory].each(&:mkpath)
18
+ end
19
+ end
20
+
21
+ def workspaces
22
+ @workspaces ||= workspace_directories.map { |dir| Workspace.from_directory(dir, logger:) }
23
+ end
24
+
25
+ def generate(with_terraform: false)
26
+ workspaces.each { |w| w.generate(with_terraform:) }
27
+ generate_terraform_entrypoint if with_terraform
28
+ end
29
+
30
+ def workspaces_directory
31
+ directory.join("workspaces")
32
+ end
33
+
34
+ def vectors_directory
35
+ directory.join("vectors")
36
+ end
37
+
38
+ private
39
+
40
+ def workspace_directories
41
+ workspaces_directory.children.select(&:directory?)
42
+ end
43
+
44
+ def generate_terraform_entrypoint
45
+ config = Terraform::ProjectConfiguration.new(workspaces)
46
+ config.write(directory.join("main.tf.json"))
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Manifold
4
+ module API
5
+ # Describes the entities for whom metrics are calculated.
6
+ class Vector
7
+ attr_reader :name, :template_path
8
+
9
+ DEFAULT_TEMPLATE_PATH = File.expand_path(
10
+ "../templates/vector_template.yml", __dir__
11
+ ).freeze
12
+
13
+ def initialize(name, template_path: DEFAULT_TEMPLATE_PATH)
14
+ @name = name
15
+ @template_path = Pathname(template_path)
16
+ end
17
+
18
+ def add
19
+ directory.mkpath
20
+ FileUtils.cp(template_path, config_path)
21
+ end
22
+
23
+ private
24
+
25
+ def directory
26
+ Pathname.pwd.join("vectors")
27
+ end
28
+
29
+ def config_path
30
+ directory.join("#{name.downcase}.yml")
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Manifold
4
+ module API
5
+ # Encapsulates a single manifold.
6
+ class Workspace
7
+ attr_reader :name, :template_path, :logger
8
+
9
+ DEFAULT_TEMPLATE_PATH = File.expand_path(
10
+ "../templates/workspace_template.yml", __dir__
11
+ ).freeze
12
+
13
+ def initialize(name, template_path: DEFAULT_TEMPLATE_PATH, logger: Logger.new($stdout))
14
+ @name = name
15
+ @template_path = template_path
16
+ @logger = logger
17
+ @vector_service = Services::VectorService.new(logger)
18
+ end
19
+
20
+ def self.from_directory(directory, logger: Logger.new($stdout))
21
+ new(directory.basename.to_s, logger:)
22
+ end
23
+
24
+ def add
25
+ [tables_directory, routines_directory].each(&:mkpath)
26
+ FileUtils.cp(template_path, manifold_path)
27
+ end
28
+
29
+ def generate(with_terraform: false)
30
+ return unless manifold_exists? && any_vectors?
31
+
32
+ generate_dimensions
33
+ generate_terraform if with_terraform
34
+ logger.info("Generated BigQuery dimensions table schema for workspace '#{name}'.")
35
+ end
36
+
37
+ def tables_directory
38
+ directory.join("tables")
39
+ end
40
+
41
+ def routines_directory
42
+ directory.join("routines")
43
+ end
44
+
45
+ def manifold_file
46
+ return nil unless manifold_exists?
47
+
48
+ File.new(manifold_path)
49
+ end
50
+
51
+ def manifold_exists?
52
+ manifold_path.file?
53
+ end
54
+
55
+ def manifold_path
56
+ directory.join("manifold.yml")
57
+ end
58
+
59
+ def terraform_main_path
60
+ directory.join("main.tf.json")
61
+ end
62
+
63
+ private
64
+
65
+ def directory
66
+ Pathname.pwd.join("workspaces", name)
67
+ end
68
+
69
+ def manifold_yaml
70
+ @manifold_yaml ||= YAML.safe_load_file(manifold_path)
71
+ end
72
+
73
+ def generate_dimensions
74
+ dimensions_path.write(dimensions_schema_json.concat("\n"))
75
+ end
76
+
77
+ def dimensions_schema
78
+ [
79
+ { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" },
80
+ { "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED",
81
+ "fields" => dimensions_fields }
82
+ ]
83
+ end
84
+
85
+ def dimensions_fields
86
+ vectors.filter_map do |vector|
87
+ logger.info("Loading vector schema for '#{vector}'.")
88
+ @vector_service.load_vector_schema(vector)
89
+ end
90
+ end
91
+
92
+ def dimensions_schema_json
93
+ JSON.pretty_generate(dimensions_schema)
94
+ end
95
+
96
+ def dimensions_path
97
+ tables_directory.join("dimensions.json")
98
+ end
99
+
100
+ def any_vectors?
101
+ !(vectors.nil? || vectors.empty?)
102
+ end
103
+
104
+ def vectors
105
+ manifold_yaml["vectors"]
106
+ end
107
+
108
+ def generate_terraform
109
+ config = Terraform::WorkspaceConfiguration.new(name)
110
+ vectors.each do |vector|
111
+ vector_config = @vector_service.load_vector_config(vector)
112
+ config.add_vector(vector_config)
113
+ end
114
+ config.write(terraform_main_path)
115
+ end
116
+ end
117
+ end
118
+ end
data/lib/manifold/cli.rb CHANGED
@@ -11,14 +11,12 @@ module Manifold
11
11
 
12
12
  self.logger = logger
13
13
  logger.level = Logger::INFO
14
-
15
- self.bq_service = Services::BigQueryService.new(logger)
16
14
  end
17
15
 
18
16
  desc "init NAME", "Generate a new umbrella project for data management"
19
17
  def init(name)
20
18
  Manifold::API::Project.create(name)
21
- logger.info "Created umbrella project '#{name}' with projects and vectors directories."
19
+ logger.info "Created umbrella project '#{name}' with workspaces and vectors directories."
22
20
  end
23
21
 
24
22
  desc "vectors SUBCOMMAND ...ARGS", "Manage vectors"
@@ -33,28 +31,28 @@ module Manifold
33
31
  end
34
32
 
35
33
  desc "add VECTOR_NAME", "Add a new vector configuration"
36
- def add(name, project: API::Project.new(File.basename(Dir.getwd)))
37
- vector = API::Vector.new(name, project: project)
34
+ def add(name)
35
+ vector = API::Vector.new(name)
38
36
  vector.add
39
37
  logger.info "Created vector configuration for '#{name}'."
40
38
  end
41
39
  }
42
40
 
43
41
  desc "add WORKSPACE_NAME", "Add a new workspace to a project"
44
- def add(name, project: API::Project.new(File.basename(Dir.getwd)))
45
- workspace = API::Workspace.new(name, project: project)
42
+ def add(name)
43
+ workspace = API::Workspace.new(name)
46
44
  workspace.add
47
45
  logger.info "Added workspace '#{name}' with tables and routines directories."
48
46
  end
49
47
 
50
- desc "generate PROJECT_NAME SERVICE", "Generate services for a project"
51
- def generate(project_name, service)
52
- case service
53
- when "bq"
54
- bq_service.generate_dimensions_schema(project_name)
55
- else
56
- logger.error("Unsupported service: #{service}")
57
- end
48
+ desc "generate", "Generate BigQuery schema for all workspaces in the project"
49
+ method_option :tf, type: :boolean, desc: "Generate Terraform configurations"
50
+ def generate
51
+ path = Pathname.pwd
52
+ name = path.basename.to_s
53
+ project = API::Project.new(name, directory: path, logger:)
54
+ project.generate(with_terraform: options[:tf])
55
+ logger.info "Generated BigQuery schema for all workspaces in the project."
58
56
  end
59
57
  end
60
58
  end
@@ -10,14 +10,23 @@ module Manifold
10
10
 
11
11
  def load_vector_schema(vector_name)
12
12
  path = config_path(vector_name)
13
- unless path.file?
14
- @logger.error("Vector configuration not found: #{path}")
15
- return nil
16
- end
17
-
18
13
  config = YAML.safe_load_file(path)
19
14
  fields = transform_attributes_to_schema(config["attributes"])
20
15
  { "name" => vector_name.downcase, "type" => "RECORD", "fields" => fields }
16
+ rescue Errno::ENOENT, Errno::EISDIR
17
+ raise "Vector configuration not found: #{path}"
18
+ rescue Psych::Exception => e
19
+ raise "Invalid YAML in vector configuration #{path}: #{e.message}"
20
+ end
21
+
22
+ def load_vector_config(vector_name)
23
+ path = config_path(vector_name)
24
+ config = YAML.safe_load_file(path)
25
+ config.merge("name" => vector_name.downcase)
26
+ rescue Errno::ENOENT, Errno::EISDIR
27
+ raise "Vector configuration not found: #{path}"
28
+ rescue Psych::Exception => e
29
+ raise "Invalid YAML in vector configuration #{path}: #{e.message}"
21
30
  end
22
31
 
23
32
  private
@@ -5,3 +5,7 @@ attributes:
5
5
  # id: STRING
6
6
  # created_at: TIMESTAMP
7
7
  # status: STRING
8
+
9
+ # Optionally, reference a view specifying how to select vector dimensions
10
+ # merge:
11
+ # source: lib/views/select_my_vector.sql
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Manifold
4
+ module Terraform
5
+ # Provides a base class for Terraform configuration files.
6
+ class Configuration
7
+ def as_json
8
+ raise NotImplementedError, "#{self.class} must implement #as_json"
9
+ end
10
+
11
+ def write(path)
12
+ path.write("#{JSON.pretty_generate(as_json)}\n")
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Manifold
4
+ module Terraform
5
+ # Represents a Terraform configuration for a Manifold project.
6
+ class ProjectConfiguration < Configuration
7
+ attr_reader :workspaces, :provider_version
8
+
9
+ DEFAULT_TERRAFORM_GOOGLE_PROVIDER_VERSION = "6.18.1"
10
+
11
+ def initialize(workspaces, provider_version: DEFAULT_TERRAFORM_GOOGLE_PROVIDER_VERSION)
12
+ super()
13
+ @workspaces = workspaces
14
+ @provider_version = provider_version
15
+ end
16
+
17
+ def as_json
18
+ {
19
+ "terraform" => terraform_block,
20
+ "provider" => provider_block,
21
+ "variable" => variables_block,
22
+ "module" => workspace_modules
23
+ }
24
+ end
25
+
26
+ private
27
+
28
+ def terraform_block
29
+ {
30
+ "required_providers" => {
31
+ "google" => {
32
+ "source" => "hashicorp/google",
33
+ "version" => provider_version
34
+ }
35
+ }
36
+ }
37
+ end
38
+
39
+ def provider_block
40
+ {
41
+ "google" => {
42
+ "project" => "${var.project_id}"
43
+ }
44
+ }
45
+ end
46
+
47
+ def variables_block
48
+ {
49
+ "project_id" => {
50
+ "description" => "The GCP project ID where resources will be created",
51
+ "type" => "string"
52
+ }
53
+ }
54
+ end
55
+
56
+ def workspace_modules
57
+ workspaces.each_with_object({}) do |workspace, modules|
58
+ modules[workspace.name] = {
59
+ "source" => "./workspaces/#{workspace.name}",
60
+ "project_id" => "${var.project_id}"
61
+ }
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Manifold
4
+ module Terraform
5
+ # Represents a Terraform configuration for a Manifold workspace.
6
+ class WorkspaceConfiguration < Configuration
7
+ attr_reader :name
8
+
9
+ def initialize(name)
10
+ super()
11
+ @name = name
12
+ @vectors = []
13
+ end
14
+
15
+ def add_vector(vector_config)
16
+ @vectors << vector_config
17
+ end
18
+
19
+ def as_json
20
+ {
21
+ "variable" => variables_block,
22
+ "resource" => {
23
+ "google_bigquery_dataset" => dataset_config,
24
+ "google_bigquery_table" => table_config,
25
+ "google_bigquery_routine" => routine_config
26
+ }.compact
27
+ }
28
+ end
29
+
30
+ private
31
+
32
+ def variables_block
33
+ {
34
+ "project_id" => {
35
+ "description" => "The GCP project ID where resources will be created",
36
+ "type" => "string"
37
+ }
38
+ }
39
+ end
40
+
41
+ def dataset_config
42
+ {
43
+ name => {
44
+ "dataset_id" => name,
45
+ "project" => "${var.project_id}",
46
+ "location" => "US"
47
+ }
48
+ }
49
+ end
50
+
51
+ def table_config
52
+ {
53
+ "dimensions" => {
54
+ "dataset_id" => name,
55
+ "project" => "${var.project_id}",
56
+ "table_id" => "Dimensions",
57
+ "schema" => "${file(\"${path.module}/tables/dimensions.json\")}",
58
+ "depends_on" => ["google_bigquery_dataset.#{name}"]
59
+ }
60
+ }
61
+ end
62
+
63
+ def routine_config
64
+ return nil if @vectors.empty?
65
+
66
+ routines = @vectors.filter_map { |vector| build_routine(vector) }
67
+ routines.empty? ? nil : routines.to_h
68
+ end
69
+
70
+ def build_routine(vector)
71
+ return nil unless vector["merge"]&.fetch("source", nil)
72
+
73
+ routine_name = "merge_#{vector["name"].downcase}_dimensions"
74
+ [routine_name, routine_attributes(routine_name, vector)]
75
+ end
76
+
77
+ def routine_attributes(routine_name, vector)
78
+ {
79
+ "dataset_id" => name,
80
+ "project" => "${var.project_id}",
81
+ "routine_id" => routine_name,
82
+ "routine_type" => "PROCEDURE",
83
+ "language" => "SQL",
84
+ "definition_body" => merge_routine_definition(vector),
85
+ "depends_on" => ["google_bigquery_dataset.#{name}"]
86
+ }
87
+ end
88
+
89
+ def merge_routine_definition(vector)
90
+ source_sql = read_source_sql(vector["merge"]["source"])
91
+ <<~SQL
92
+ MERGE #{name}.Dimensions AS TARGET
93
+ USING (
94
+ #{source_sql}
95
+ ) AS source
96
+ ON source.id = target.id
97
+ WHEN MATCHED THEN UPDATE SET target.#{vector["name"].downcase} = source.dimensions
98
+ WHEN NOT MATCHED THEN INSERT ROW;
99
+ SQL
100
+ end
101
+
102
+ def read_source_sql(source_path)
103
+ File.read(Pathname.pwd.join(source_path))
104
+ end
105
+ end
106
+ end
107
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Manifold
4
- VERSION = "0.0.7"
4
+ VERSION = "0.0.9"
5
5
  end
data/lib/manifold.rb CHANGED
@@ -1,10 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "json"
4
+ require "logger"
3
5
  require "pathname"
4
6
  require "thor"
5
7
  require "yaml"
6
8
 
7
- Dir[File.join(__dir__, "manifold", "**", "*.rb")].sort.each do |file|
9
+ Dir[File.join(__dir__, "manifold", "**", "*.rb")].each do |file|
8
10
  require file
9
11
  end
10
12
 
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: manifold-cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - claytongentry
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2024-11-11 00:00:00.000000000 Z
10
+ date: 2025-02-04 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: thor
@@ -24,7 +23,6 @@ dependencies:
24
23
  - - ">="
25
24
  - !ruby/object:Gem::Version
26
25
  version: '0'
27
- description:
28
26
  email:
29
27
  - clayton@bustle.com
30
28
  executables:
@@ -44,14 +42,16 @@ files:
44
42
  - docs/CONTRIBUTING.md
45
43
  - lib/manifold.rb
46
44
  - lib/manifold/api.rb
45
+ - lib/manifold/api/project.rb
46
+ - lib/manifold/api/vector.rb
47
+ - lib/manifold/api/workspace.rb
47
48
  - lib/manifold/cli.rb
48
- - lib/manifold/project/project.rb
49
- - lib/manifold/project/vector.rb
50
- - lib/manifold/project/workspace.rb
51
- - lib/manifold/services/big_query_service.rb
52
49
  - lib/manifold/services/vector_service.rb
53
50
  - lib/manifold/templates/vector_template.yml
54
51
  - lib/manifold/templates/workspace_template.yml
52
+ - lib/manifold/terraform/configuration.rb
53
+ - lib/manifold/terraform/project_configuration.rb
54
+ - lib/manifold/terraform/workspace_configuration.rb
55
55
  - lib/manifold/version.rb
56
56
  - sig/manifold.rbs
57
57
  homepage: https://github.com/bustle/manifold
@@ -61,7 +61,7 @@ metadata:
61
61
  homepage_uri: https://github.com/bustle/manifold
62
62
  source_code_uri: https://github.com/bustle/manifold
63
63
  changelog_uri: https://github.com/bustle/manifold/CHANGELOG.md
64
- post_install_message:
64
+ rubygems_mfa_required: 'true'
65
65
  rdoc_options: []
66
66
  require_paths:
67
67
  - lib
@@ -69,15 +69,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
69
69
  requirements:
70
70
  - - ">="
71
71
  - !ruby/object:Gem::Version
72
- version: 3.0.0
72
+ version: 3.2.0
73
73
  required_rubygems_version: !ruby/object:Gem::Requirement
74
74
  requirements:
75
75
  - - ">="
76
76
  - !ruby/object:Gem::Version
77
77
  version: '0'
78
78
  requirements: []
79
- rubygems_version: 3.5.22
80
- signing_key:
79
+ rubygems_version: 3.6.2
81
80
  specification_version: 4
82
81
  summary: A CLI for managing data infrastructures in BigQuery
83
82
  test_files: []
@@ -1,33 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Manifold
4
- module API
5
- # Projects API
6
- class Project
7
- attr_reader :name, :directory
8
-
9
- def initialize(name, directory: Pathname.pwd.join(name))
10
- self.name = name
11
- self.directory = Pathname(directory)
12
- end
13
-
14
- def self.create(name, directory: Pathname.pwd.join(name))
15
- new(name, directory: directory).tap do |project|
16
- [project.workspaces_directory, project.vectors_directory].each(&:mkpath)
17
- end
18
- end
19
-
20
- def workspaces_directory
21
- directory.join("workspaces")
22
- end
23
-
24
- def vectors_directory
25
- directory.join("vectors")
26
- end
27
-
28
- private
29
-
30
- attr_writer :name, :directory
31
- end
32
- end
33
- end
@@ -1,37 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Manifold
4
- module API
5
- # Describes the entities for whom metrics are calculated.
6
- class Vector
7
- attr_reader :name, :project, :template_path
8
-
9
- DEFAULT_TEMPLATE_PATH = Pathname.pwd.join(
10
- "lib", "manifold", "templates", "vector_template.yml"
11
- ).freeze
12
-
13
- def initialize(name, project:, template_path: DEFAULT_TEMPLATE_PATH)
14
- self.name = name
15
- self.project = project
16
- self.template_path = Pathname(template_path)
17
- end
18
-
19
- def add
20
- directory.mkpath
21
- FileUtils.cp(template_path, config_path)
22
- end
23
-
24
- private
25
-
26
- attr_writer :name, :project, :template_path
27
-
28
- def directory
29
- project.directory.join("vectors")
30
- end
31
-
32
- def config_path
33
- directory.join("#{name.downcase}.yml")
34
- end
35
- end
36
- end
37
- end
@@ -1,51 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Manifold
4
- module API
5
- # Encapsulates a single manifold.
6
- class Workspace
7
- attr_reader :name, :project, :template_path
8
-
9
- DEFAULT_TEMPLATE_PATH = Pathname.pwd.join(
10
- "lib", "manifold", "templates", "workspace_template.yml"
11
- )
12
-
13
- def initialize(name, project:, template_path: DEFAULT_TEMPLATE_PATH)
14
- self.name = name
15
- self.project = project
16
- self.template_path = template_path
17
- end
18
-
19
- def add
20
- [tables_directory, routines_directory].each(&:mkpath)
21
- FileUtils.cp(template_path, manifold_path)
22
- end
23
-
24
- def tables_directory
25
- project.workspaces_directory.join(name, "tables")
26
- end
27
-
28
- def routines_directory
29
- project.workspaces_directory.join(name, "routines")
30
- end
31
-
32
- def manifold_file
33
- return nil unless manifold_exists?
34
-
35
- File.new(manifold_path)
36
- end
37
-
38
- def manifold_exists?
39
- manifold_path.file?
40
- end
41
-
42
- def manifold_path
43
- project.workspaces_directory.join(name, "manifold.yml")
44
- end
45
-
46
- private
47
-
48
- attr_writer :name, :project, :template_path
49
- end
50
- end
51
- end
@@ -1,61 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Manifold
4
- module Services
5
- # Handles the generation of BigQuery schemas based on project configurations
6
- class BigQueryService
7
- def initialize(logger)
8
- @logger = logger
9
- @vector_service = Manifold::Services::VectorService.new(logger)
10
- end
11
-
12
- def generate_dimensions_schema(project_name)
13
- config_path = Pathname.pwd.join("projects", project_name, "manifold.yml")
14
- return unless validate_config_exists(config_path, project_name)
15
-
16
- config = YAML.safe_load_file(config_path)
17
-
18
- fields = config["vectors"].reduce([]) do |list, vector|
19
- @logger.info("Loading vector schema for '#{vector}'.")
20
- [*@vector_service.load_vector_schema(vector), *list]
21
- end
22
-
23
- create_dimensions_file(project_name, fields)
24
- end
25
-
26
- private
27
-
28
- def validate_config_exists(config_path, project_name)
29
- unless config_path.file?
30
- @logger.error("Config file missing for project '#{project_name}'.")
31
- return false
32
- end
33
- true
34
- end
35
-
36
- def create_dimensions_file(project_name, fields)
37
- tables_directory(project_name).mkpath
38
- dimensions = dimensions_schema(fields)
39
-
40
- dimensions_path(project_name).write(dimensions)
41
- @logger.info("Generated BigQuery dimensions table schema for '#{project_name}'.")
42
- end
43
-
44
- def dimensions_schema(fields)
45
- JSON.pretty_generate([
46
- { "type" => "STRING", "name" => "id", "mode" => "REQUIRED" },
47
- { "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED",
48
- "fields" => fields }
49
- ]).concat("\n")
50
- end
51
-
52
- def tables_directory(project_name)
53
- Pathname.pwd.join("projects", project_name, "bq", "tables")
54
- end
55
-
56
- def dimensions_path(project_name)
57
- tables_directory(project_name).join("dimensions.json")
58
- end
59
- end
60
- end
61
- end