manifold-cli 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +12 -0
- data/.rubocop_todo.yml +7 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/LICENSE.txt +21 -0
- data/README.md +126 -0
- data/Rakefile +8 -0
- data/bin/manifold +9 -0
- data/docs/CONTRIBUTING.md +33 -0
- data/lib/manifold/api.rb +7 -0
- data/lib/manifold/cli.rb +60 -0
- data/lib/manifold/project/project.rb +33 -0
- data/lib/manifold/project/vector.rb +37 -0
- data/lib/manifold/project/workspace.rb +51 -0
- data/lib/manifold/services/big_query_service.rb +61 -0
- data/lib/manifold/services/vector_service.rb +40 -0
- data/lib/manifold/templates/vector_template.yml +7 -0
- data/lib/manifold/templates/workspace_template.yml +25 -0
- data/lib/manifold/version.rb +5 -0
- data/lib/manifold.rb +13 -0
- data/sig/manifold.rbs +4 -0
- metadata +83 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f50907061a12bacfb9d34d83f56fcf6bc98317c6516a216340450fef78cebe3f
|
4
|
+
data.tar.gz: b1722162e241d3b7275bd5675c457ce53c264d425885cc1bf35b9ae4bc62db99
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: aa63a9e3a52d441d6f742efb8bab2037d67f2d12b11f8f22b835949ae3be26c3e7a01a2b15b288fcc9c9222fccd2ccb4ade3f091f01f52c678caded9fcf6733a
|
7
|
+
data.tar.gz: df8807050b34f49d84741afcfad06a9c4d20406f3e67e8ad9a531fc523d47d120f630693660624ff1a2a6ba3c19d485bb7bc6340d7e70bd2aebe96f425821668
|
data/.rspec
ADDED
data/.rubocop.yml
ADDED
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2024-11-09 13:21:19 UTC using RuboCop version 1.68.0.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
data/CHANGELOG.md
ADDED
data/CODE_OF_CONDUCT.md
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
# Contributor Covenant Code of Conduct
|
2
|
+
|
3
|
+
## Our Pledge
|
4
|
+
|
5
|
+
We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
|
6
|
+
|
7
|
+
We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community.
|
8
|
+
|
9
|
+
## Our Standards
|
10
|
+
|
11
|
+
Examples of behavior that contributes to a positive environment for our community include:
|
12
|
+
|
13
|
+
- Demonstrating empathy and kindness toward other people
|
14
|
+
- Being respectful of differing opinions, viewpoints, and experiences
|
15
|
+
- Giving and gracefully accepting constructive feedback
|
16
|
+
- Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience
|
17
|
+
- Focusing on what is best not just for us as individuals, but for the overall community
|
18
|
+
|
19
|
+
Examples of unacceptable behavior include:
|
20
|
+
|
21
|
+
- The use of sexualized language or imagery, and sexual attention or
|
22
|
+
advances of any kind
|
23
|
+
- Trolling, insulting or derogatory comments, and personal or political attacks
|
24
|
+
- Public or private harassment
|
25
|
+
- Publishing others' private information, such as a physical or email
|
26
|
+
address, without their explicit permission
|
27
|
+
- Other conduct which could reasonably be considered inappropriate in a
|
28
|
+
professional setting
|
29
|
+
|
30
|
+
## Enforcement Responsibilities
|
31
|
+
|
32
|
+
Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful.
|
33
|
+
|
34
|
+
Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate.
|
35
|
+
|
36
|
+
## Scope
|
37
|
+
|
38
|
+
This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event.
|
39
|
+
|
40
|
+
## Enforcement
|
41
|
+
|
42
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at clayton@bustle.com. All complaints will be reviewed and investigated promptly and fairly.
|
43
|
+
|
44
|
+
All community leaders are obligated to respect the privacy and security of the reporter of any incident.
|
45
|
+
|
46
|
+
## Enforcement Guidelines
|
47
|
+
|
48
|
+
Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct:
|
49
|
+
|
50
|
+
### 1. Correction
|
51
|
+
|
52
|
+
**Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community.
|
53
|
+
|
54
|
+
**Consequence**: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested.
|
55
|
+
|
56
|
+
### 2. Warning
|
57
|
+
|
58
|
+
**Community Impact**: A violation through a single incident or series of actions.
|
59
|
+
|
60
|
+
**Consequence**: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban.
|
61
|
+
|
62
|
+
### 3. Temporary Ban
|
63
|
+
|
64
|
+
**Community Impact**: A serious violation of community standards, including sustained inappropriate behavior.
|
65
|
+
|
66
|
+
**Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban.
|
67
|
+
|
68
|
+
### 4. Permanent Ban
|
69
|
+
|
70
|
+
**Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals.
|
71
|
+
|
72
|
+
**Consequence**: A permanent ban from any sort of public interaction within the community.
|
73
|
+
|
74
|
+
## Attribution
|
75
|
+
|
76
|
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0,
|
77
|
+
available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
|
78
|
+
|
79
|
+
Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/diversity).
|
80
|
+
|
81
|
+
[homepage]: https://www.contributor-covenant.org
|
82
|
+
|
83
|
+
For answers to common questions about this code of conduct, see the FAQ at
|
84
|
+
https://www.contributor-covenant.org/faq. Translations are available at https://www.contributor-covenant.org/translations.
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2024 BDG Media, Inc
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
![manifold](https://github.com/bustle/manifold/assets/8030836/be7f24f6-27b9-4924-8e1a-ae2899c7b272)
|
2
|
+
|
3
|
+
# Manifold
|
4
|
+
|
5
|
+
![CI](https://github.com/bustle/manifold/actions/workflows/ci.yml/badge.svg)
|
6
|
+
|
7
|
+
Manifold is a framework designed to streamline the process of generating and managing data infrastructures in Google BigQuery using Terraform. By leveraging Manifold, teams can efficiently create complex views that join dimensional data with multiple metrics tables, enabling more dynamic data interactions and fewer sandcastles.
|
8
|
+
|
9
|
+
## Philosophy
|
10
|
+
|
11
|
+
At the heart of Manifold, our philosophy is to simplify the complexity inherent in managing large-scale data architectures. We aim to provide a tool that not only facilitates the easy setup of data structures but also adheres to best practices in scalability, maintainability, and performance. Manifold is built for data engineers, by data engineers, ensuring that the nuances and common challenges in data management are well-addressed.
|
12
|
+
|
13
|
+
## Features
|
14
|
+
|
15
|
+
- **Unified Data Modeling**: Manifold introduces a standardized way to model dimensions and metrics, ensuring consistency and reliability in data reporting and analysis.
|
16
|
+
- **Scalability**: Designed to handle large volumes of data, supporting a variety of data types and structures.
|
17
|
+
- **Flexibility**: Easily adapt to different kinds of metric groupings such as by device type (e.g., desktop, tablet, mobile) with identical metric structures beneath these segmentations.
|
18
|
+
|
19
|
+
## Getting Started
|
20
|
+
|
21
|
+
### Prerequisites
|
22
|
+
|
23
|
+
- Ruby
|
24
|
+
- Terraform
|
25
|
+
- Google Cloud SDK (gcloud)
|
26
|
+
|
27
|
+
### Installation
|
28
|
+
|
29
|
+
1. **Install the Manifold Gem**:
|
30
|
+
`manifold` is distributed as a Ruby gem. To install it, run:
|
31
|
+
|
32
|
+
```bash
|
33
|
+
gem install manifold
|
34
|
+
```
|
35
|
+
|
36
|
+
2. **Setup Terraform**: Ensure that Terraform is installed and configured to interact with your Google Cloud Platform account.
|
37
|
+
|
38
|
+
3. **Configure Your Environment**: Set up your environment variables and credentials to access Google BigQuery and other necessary services.
|
39
|
+
|
40
|
+
## Usage
|
41
|
+
|
42
|
+
1. **Initialize a New Umbrella Project**
|
43
|
+
|
44
|
+
Set up a new umbrella project directory with the necessary structure for managing multiple data projects.
|
45
|
+
|
46
|
+
```bash
|
47
|
+
manifold init <project_name>
|
48
|
+
```
|
49
|
+
|
50
|
+
2. **Add a New Data Project**
|
51
|
+
|
52
|
+
Add a new data project under the umbrella. This setup includes creating a directory for the data project and initializing with a template `manifold.yml` file.
|
53
|
+
|
54
|
+
```bash
|
55
|
+
cd <project_name>
|
56
|
+
manifold add <data_project_name>
|
57
|
+
```
|
58
|
+
|
59
|
+
3. **Generate BigQuery Resource Definitions**
|
60
|
+
|
61
|
+
After you fill out the manifold.yml file, this command generates the necessary BigQuery schema files based on the specified dimensions and metrics.
|
62
|
+
|
63
|
+
```bash
|
64
|
+
manifold generate <data_project_name> bq
|
65
|
+
```
|
66
|
+
|
67
|
+
## Manifold Configuration
|
68
|
+
|
69
|
+
### Vectors
|
70
|
+
|
71
|
+
Vectors are the entities you can roll up data for. Each vector has a set of dimensions defined in its `vectors/<vector_name>.yml` configuration file.
|
72
|
+
|
73
|
+
```yaml
|
74
|
+
vectors:
|
75
|
+
- page
|
76
|
+
```
|
77
|
+
|
78
|
+
#### Add a vector to your project
|
79
|
+
|
80
|
+
```bash
|
81
|
+
manifold vectors add page
|
82
|
+
```
|
83
|
+
|
84
|
+
### Metrics
|
85
|
+
|
86
|
+
Metrics are fields that contain numerical data that can be aggregated. They are typically used to measure performance or other quantitative data.
|
87
|
+
|
88
|
+
#### Example
|
89
|
+
|
90
|
+
```yaml
|
91
|
+
metrics:
|
92
|
+
- name: Pageviews
|
93
|
+
id:
|
94
|
+
field: pageId
|
95
|
+
type: STRING
|
96
|
+
interval:
|
97
|
+
type: TIMESTAMP
|
98
|
+
expression: TIMESTAMP_TRUNC(timestamp, HOUR)
|
99
|
+
aggregations:
|
100
|
+
- name: pageviews
|
101
|
+
method: count
|
102
|
+
- name: sessions
|
103
|
+
method: distinct
|
104
|
+
field: sessionid
|
105
|
+
source:
|
106
|
+
type: bigquery
|
107
|
+
name: Events.Requests
|
108
|
+
breakouts:
|
109
|
+
- name: us
|
110
|
+
condition: CountryId = 2840
|
111
|
+
```
|
112
|
+
|
113
|
+
- _Name_: The name of the metric.
|
114
|
+
- _ID_: The field that uniquely identifies the metric, along with its type
|
115
|
+
- _Interval_: The time interval over which the metric is aggregated
|
116
|
+
- _Aggregations_: The distinct used to aggregate the metric
|
117
|
+
- _Source_: The source table from which the metric is derived
|
118
|
+
- _Breakouts_: Custom segmentations of the metric
|
119
|
+
|
120
|
+
## Contributing
|
121
|
+
|
122
|
+
We welcome contributions from the community! Please check out our [contribution guidelines](docs/CONTRIBUTING.md) for more information on how to get involved.
|
123
|
+
|
124
|
+
## License
|
125
|
+
|
126
|
+
Distributed under the MIT License. See LICENSE for more information
|
data/Rakefile
ADDED
data/bin/manifold
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# Contributing
|
2
|
+
|
3
|
+
We are excited that you are interested in contributing to Manifold! To maintain the quality of our project, we ask that contributors adhere to the following guidelines:
|
4
|
+
|
5
|
+
## How to Contribute
|
6
|
+
|
7
|
+
- **Familiarize yourself with the project**:
|
8
|
+
Spend some time understanding the codebase, philosophy, and design patterns used in Manifold. Feel free to ask questions if something is unclear.
|
9
|
+
|
10
|
+
- ** Check the Issues Tracker**:
|
11
|
+
Before creating a new issue, please check the repository's issue tracker to avoid duplicates. If you find an issue you’d like to tackle, comment that you’re working on it.
|
12
|
+
|
13
|
+
- **Fork and Clone the Repository**:
|
14
|
+
Make a fork of the repository, clone it to your machine, and create a new branch for your contribution.
|
15
|
+
|
16
|
+
- **Code Consistency**:
|
17
|
+
Follow the coding style and conventions already used in Manifold. This includes commenting your code where necessary and adhering to the existing naming conventions and file structure.
|
18
|
+
|
19
|
+
- **Writing Tests**:
|
20
|
+
Add tests that cover the new features or fixes you are introducing. Ensure that all tests pass before submitting a pull request.
|
21
|
+
|
22
|
+
- **Documentation**:
|
23
|
+
Update the README or documentation with details of changes to the interface, additional environment variables, dependencies, or database migrations, etc.
|
24
|
+
|
25
|
+
- **Submit a Pull Request**:
|
26
|
+
Push your changes to your fork and submit a pull request to the main Manifold repository. Provide a clear description of the problem and solution, including any relevant issue numbers.
|
27
|
+
|
28
|
+
- **Review and Collaborate**:
|
29
|
+
Be open to feedback and make requested modifications to your pull request if needed. Discussion and collaboration often lead to the best solutions.
|
30
|
+
|
31
|
+
## Code of Conduct
|
32
|
+
|
33
|
+
Respect and professionalism are required at all times; harassment or offensive behavior will not be tolerated. By participating, you are expected to uphold this code.
|
data/lib/manifold/api.rb
ADDED
data/lib/manifold/cli.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Manifold
|
4
|
+
# CLI provides command line interface functionality
|
5
|
+
# for creating and managing umbrella projects for data management.
|
6
|
+
class CLI < Thor
|
7
|
+
attr_accessor :logger, :bq_service
|
8
|
+
|
9
|
+
def initialize(*args, logger: Logger.new($stdout))
|
10
|
+
super(*args)
|
11
|
+
|
12
|
+
self.logger = logger
|
13
|
+
logger.level = Logger::INFO
|
14
|
+
|
15
|
+
self.bq_service = Services::BigQueryService.new(logger)
|
16
|
+
end
|
17
|
+
|
18
|
+
desc "init NAME", "Generate a new umbrella project for data management"
|
19
|
+
def init(name)
|
20
|
+
Manifold::API::Project.create(name)
|
21
|
+
logger.info "Created umbrella project '#{name}' with projects and vectors directories."
|
22
|
+
end
|
23
|
+
|
24
|
+
desc "vectors SUBCOMMAND ...ARGS", "Manage vectors"
|
25
|
+
subcommand "vectors", Class.new(Thor) {
|
26
|
+
namespace :vectors
|
27
|
+
|
28
|
+
attr_accessor :logger
|
29
|
+
|
30
|
+
def initialize(*args, logger: Logger.new($stdout))
|
31
|
+
super(*args)
|
32
|
+
self.logger = logger
|
33
|
+
end
|
34
|
+
|
35
|
+
desc "add VECTOR_NAME", "Add a new vector configuration"
|
36
|
+
def add(name, project: API::Project.new(File.basename(Dir.getwd)))
|
37
|
+
vector = API::Vector.new(name, project: project)
|
38
|
+
vector.add
|
39
|
+
logger.info "Created vector configuration for '#{name}'."
|
40
|
+
end
|
41
|
+
}
|
42
|
+
|
43
|
+
desc "add WORKSPACE_NAME", "Add a new workspace to a project"
|
44
|
+
def add(name, project: API::Project.new(File.basename(Dir.getwd)))
|
45
|
+
workspace = API::Workspace.new(name, project: project)
|
46
|
+
workspace.add
|
47
|
+
logger.info "Added workspace '#{name}' with tables and routines directories."
|
48
|
+
end
|
49
|
+
|
50
|
+
desc "generate PROJECT_NAME SERVICE", "Generate services for a project"
|
51
|
+
def generate(project_name, service)
|
52
|
+
case service
|
53
|
+
when "bq"
|
54
|
+
bq_service.generate_dimensions_schema(project_name)
|
55
|
+
else
|
56
|
+
logger.error("Unsupported service: #{service}")
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Manifold
|
4
|
+
module API
|
5
|
+
# Projects API
|
6
|
+
class Project
|
7
|
+
attr_reader :name, :directory
|
8
|
+
|
9
|
+
def initialize(name, directory: Pathname.pwd.join(name))
|
10
|
+
self.name = name
|
11
|
+
self.directory = Pathname(directory)
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.create(name, directory: Pathname.pwd.join(name))
|
15
|
+
new(name, directory: directory).tap do |project|
|
16
|
+
[project.workspaces_directory, project.vectors_directory].each(&:mkpath)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def workspaces_directory
|
21
|
+
directory.join("workspaces")
|
22
|
+
end
|
23
|
+
|
24
|
+
def vectors_directory
|
25
|
+
directory.join("vectors")
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
attr_writer :name, :directory
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Manifold
|
4
|
+
module API
|
5
|
+
# Describes the entities for whom metrics are calculated.
|
6
|
+
class Vector
|
7
|
+
attr_reader :name, :project, :template_path
|
8
|
+
|
9
|
+
DEFAULT_TEMPLATE_PATH = Pathname.pwd.join(
|
10
|
+
"lib", "manifold", "templates", "vector_template.yml"
|
11
|
+
).freeze
|
12
|
+
|
13
|
+
def initialize(name, project:, template_path: DEFAULT_TEMPLATE_PATH)
|
14
|
+
self.name = name
|
15
|
+
self.project = project
|
16
|
+
self.template_path = Pathname(template_path)
|
17
|
+
end
|
18
|
+
|
19
|
+
def add
|
20
|
+
directory.mkpath
|
21
|
+
FileUtils.cp(template_path, config_path)
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
attr_writer :name, :project, :template_path
|
27
|
+
|
28
|
+
def directory
|
29
|
+
project.directory.join("vectors")
|
30
|
+
end
|
31
|
+
|
32
|
+
def config_path
|
33
|
+
directory.join("#{name.downcase}.yml")
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Manifold
|
4
|
+
module API
|
5
|
+
# Encapsulates a single manifold.
|
6
|
+
class Workspace
|
7
|
+
attr_reader :name, :project, :template_path
|
8
|
+
|
9
|
+
DEFAULT_TEMPLATE_PATH = Pathname.pwd.join(
|
10
|
+
"lib", "manifold", "templates", "workspace_template.yml"
|
11
|
+
)
|
12
|
+
|
13
|
+
def initialize(name, project:, template_path: DEFAULT_TEMPLATE_PATH)
|
14
|
+
self.name = name
|
15
|
+
self.project = project
|
16
|
+
self.template_path = template_path
|
17
|
+
end
|
18
|
+
|
19
|
+
def add
|
20
|
+
[tables_directory, routines_directory].each(&:mkpath)
|
21
|
+
FileUtils.cp(template_path, manifold_path)
|
22
|
+
end
|
23
|
+
|
24
|
+
def tables_directory
|
25
|
+
project.workspaces_directory.join(name, "tables")
|
26
|
+
end
|
27
|
+
|
28
|
+
def routines_directory
|
29
|
+
project.workspaces_directory.join(name, "routines")
|
30
|
+
end
|
31
|
+
|
32
|
+
def manifold_file
|
33
|
+
return nil unless manifold_exists?
|
34
|
+
|
35
|
+
File.new(manifold_path)
|
36
|
+
end
|
37
|
+
|
38
|
+
def manifold_exists?
|
39
|
+
manifold_path.file?
|
40
|
+
end
|
41
|
+
|
42
|
+
def manifold_path
|
43
|
+
project.workspaces_directory.join(name, "manifold.yml")
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
attr_writer :name, :project, :template_path
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Manifold
|
4
|
+
module Services
|
5
|
+
# Handles the generation of BigQuery schemas based on project configurations
|
6
|
+
class BigQueryService
|
7
|
+
def initialize(logger)
|
8
|
+
@logger = logger
|
9
|
+
@vector_service = Manifold::Services::VectorService.new(logger)
|
10
|
+
end
|
11
|
+
|
12
|
+
def generate_dimensions_schema(project_name)
|
13
|
+
config_path = Pathname.pwd.join("projects", project_name, "manifold.yml")
|
14
|
+
return unless validate_config_exists(config_path, project_name)
|
15
|
+
|
16
|
+
config = YAML.safe_load_file(config_path)
|
17
|
+
|
18
|
+
fields = config["vectors"].reduce([]) do |list, vector|
|
19
|
+
@logger.info("Loading vector schema for '#{vector}'.")
|
20
|
+
[*@vector_service.load_vector_schema(vector), *list]
|
21
|
+
end
|
22
|
+
|
23
|
+
create_dimensions_file(project_name, fields)
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def validate_config_exists(config_path, project_name)
|
29
|
+
unless config_path.file?
|
30
|
+
@logger.error("Config file missing for project '#{project_name}'.")
|
31
|
+
return false
|
32
|
+
end
|
33
|
+
true
|
34
|
+
end
|
35
|
+
|
36
|
+
def create_dimensions_file(project_name, fields)
|
37
|
+
tables_directory(project_name).mkpath
|
38
|
+
dimensions = dimensions_schema(fields)
|
39
|
+
|
40
|
+
dimensions_path(project_name).write(dimensions)
|
41
|
+
@logger.info("Generated BigQuery dimensions table schema for '#{project_name}'.")
|
42
|
+
end
|
43
|
+
|
44
|
+
def dimensions_schema(fields)
|
45
|
+
JSON.pretty_generate([
|
46
|
+
{ "type" => "STRING", "name" => "id", "mode" => "REQUIRED" },
|
47
|
+
{ "type" => "RECORD", "name" => "dimensions", "mode" => "REQUIRED",
|
48
|
+
"fields" => fields }
|
49
|
+
]).concat("\n")
|
50
|
+
end
|
51
|
+
|
52
|
+
def tables_directory(project_name)
|
53
|
+
Pathname.pwd.join("projects", project_name, "bq", "tables")
|
54
|
+
end
|
55
|
+
|
56
|
+
def dimensions_path(project_name)
|
57
|
+
tables_directory(project_name).join("dimensions.json")
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Manifold
|
4
|
+
module Services
|
5
|
+
# Handles the loading of vector schemas from configuration files
|
6
|
+
class VectorService
|
7
|
+
def initialize(logger)
|
8
|
+
@logger = logger
|
9
|
+
end
|
10
|
+
|
11
|
+
def load_vector_schema(vector_name)
|
12
|
+
path = config_path(vector_name)
|
13
|
+
unless path.file?
|
14
|
+
@logger.error("Vector configuration not found: #{path}")
|
15
|
+
return nil
|
16
|
+
end
|
17
|
+
|
18
|
+
config = YAML.safe_load_file(path)
|
19
|
+
fields = transform_attributes_to_schema(config["attributes"])
|
20
|
+
{ "name" => vector_name.downcase, "type" => "RECORD", "fields" => fields }
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def transform_attributes_to_schema(attributes)
|
26
|
+
attributes.map do |name, type|
|
27
|
+
{
|
28
|
+
"name" => name,
|
29
|
+
"type" => type.upcase,
|
30
|
+
"mode" => "NULLABLE"
|
31
|
+
}
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def config_path(vector_name)
|
36
|
+
Pathname.pwd.join("vectors", "#{vector_name.downcase}.yml")
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
vectors:
|
2
|
+
# List the vectors you want to include in this manifold
|
3
|
+
# Example:
|
4
|
+
# - User
|
5
|
+
|
6
|
+
metrics:
|
7
|
+
- name: # Add your metric name here, e.g. Pageviews
|
8
|
+
|
9
|
+
id:
|
10
|
+
field: # Identify the field that uniquely identifies each manifold vector
|
11
|
+
type: # Specify the type of that field, e.g. INTEGER
|
12
|
+
|
13
|
+
interval:
|
14
|
+
type: # Specify the interval type, e.g. TIMESTAMP or DATE
|
15
|
+
expression: # Compute the interval for the entry, e.g. TIMESTAMP_TRUNC(timestamp, HOUR)
|
16
|
+
|
17
|
+
aggregations:
|
18
|
+
# Add any aggregations this metric should present
|
19
|
+
|
20
|
+
source:
|
21
|
+
type: BIGQUERY_TABLE
|
22
|
+
project: # Add your project name here
|
23
|
+
dataset: # Add your dataset name here
|
24
|
+
table: # Add your table name
|
25
|
+
filter: # (optional) Add your filter condition here
|
data/lib/manifold.rb
ADDED
data/sig/manifold.rbs
ADDED
metadata
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: manifold-cli
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.7
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- claytongentry
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2024-11-11 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: thor
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
description:
|
28
|
+
email:
|
29
|
+
- clayton@bustle.com
|
30
|
+
executables:
|
31
|
+
- manifold
|
32
|
+
extensions: []
|
33
|
+
extra_rdoc_files: []
|
34
|
+
files:
|
35
|
+
- ".rspec"
|
36
|
+
- ".rubocop.yml"
|
37
|
+
- ".rubocop_todo.yml"
|
38
|
+
- CHANGELOG.md
|
39
|
+
- CODE_OF_CONDUCT.md
|
40
|
+
- LICENSE.txt
|
41
|
+
- README.md
|
42
|
+
- Rakefile
|
43
|
+
- bin/manifold
|
44
|
+
- docs/CONTRIBUTING.md
|
45
|
+
- lib/manifold.rb
|
46
|
+
- lib/manifold/api.rb
|
47
|
+
- lib/manifold/cli.rb
|
48
|
+
- lib/manifold/project/project.rb
|
49
|
+
- lib/manifold/project/vector.rb
|
50
|
+
- lib/manifold/project/workspace.rb
|
51
|
+
- lib/manifold/services/big_query_service.rb
|
52
|
+
- lib/manifold/services/vector_service.rb
|
53
|
+
- lib/manifold/templates/vector_template.yml
|
54
|
+
- lib/manifold/templates/workspace_template.yml
|
55
|
+
- lib/manifold/version.rb
|
56
|
+
- sig/manifold.rbs
|
57
|
+
homepage: https://github.com/bustle/manifold
|
58
|
+
licenses:
|
59
|
+
- MIT
|
60
|
+
metadata:
|
61
|
+
homepage_uri: https://github.com/bustle/manifold
|
62
|
+
source_code_uri: https://github.com/bustle/manifold
|
63
|
+
changelog_uri: https://github.com/bustle/manifold/CHANGELOG.md
|
64
|
+
post_install_message:
|
65
|
+
rdoc_options: []
|
66
|
+
require_paths:
|
67
|
+
- lib
|
68
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
69
|
+
requirements:
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: 3.0.0
|
73
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
|
+
requirements:
|
75
|
+
- - ">="
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
requirements: []
|
79
|
+
rubygems_version: 3.5.22
|
80
|
+
signing_key:
|
81
|
+
specification_version: 4
|
82
|
+
summary: A CLI for managing data infrastructures in BigQuery
|
83
|
+
test_files: []
|