dlthub-init 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dlthub_init/__init__.py +1 -0
- dlthub_init/__main__.py +10 -0
- dlthub_init/_bundled_skills/.gitkeep +0 -0
- dlthub_init/_bundled_skills/dlthub-router/SKILL.md +49 -0
- dlthub_init/_bundled_skills/improve-skills/SKILL.md +87 -0
- dlthub_init/_bundled_skills/setup-secrets/SKILL.md +120 -0
- dlthub_init/_bundled_skills/setup-secrets/cli-reference.md +69 -0
- dlthub_init/cli.py +170 -0
- dlthub_init/collisions.py +83 -0
- dlthub_init/config.py +5 -0
- dlthub_init/display.py +108 -0
- dlthub_init/errors.py +27 -0
- dlthub_init/prompts.py +33 -0
- dlthub_init/scaffold.py +96 -0
- dlthub_init/scaffolds/minimal_workspace/.dlt/.workspace +0 -0
- dlthub_init/scaffolds/minimal_workspace/.dlt/config.toml +5 -0
- dlthub_init/scaffolds/minimal_workspace/.dlt/secrets.toml +1 -0
- dlthub_init/scaffolds/minimal_workspace/.gitignore +203 -0
- dlthub_init/scaffolds/minimal_workspace/pyproject.toml +19 -0
- dlthub_init/scaffolds/minimal_workspace/uv.lock +3045 -0
- dlthub_init/skills.py +75 -0
- dlthub_init/strings.py +86 -0
- dlthub_init/uv.py +120 -0
- dlthub_init-0.1.0.dist-info/METADATA +171 -0
- dlthub_init-0.1.0.dist-info/RECORD +28 -0
- dlthub_init-0.1.0.dist-info/WHEEL +4 -0
- dlthub_init-0.1.0.dist-info/entry_points.txt +2 -0
- dlthub_init-0.1.0.dist-info/licenses/LICENSE +92 -0
dlthub_init/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""dlthub-init: scaffold a dltHub workspace into a new or existing directory."""
|
dlthub_init/__main__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: dlthub-router
|
|
3
|
+
description: "The entry point for building anything with dlthub. Use this skill to route the user to the right workflow toolkit and install it on demand. MUST use when the user asks 'what can you do', 'what can I build', 'what are toolkits', 'how do I build a pipeline', 'I want to pull data from a REST API', 'ingest from a SQL database', 'load CSVs from S3', 'make reports / dashboards', 'transform / model my data', 'add data quality checks', 'how do I deploy / schedule a pipeline', 'I'm new to dlthub', 'where do I start', or seems unsure what to do next after setup. Also use whenever the user expresses a data-engineering goal but no matching workflow toolkit is installed yet — this skill installs it on demand. Do NOT use when the toolkit matching the user's intent is already installed — go straight to its entry skill instead; only route/install when the matching toolkit is missing. Do NOT use when a specific task is already in progress (debugging a pipeline, validating data, adding endpoints) and its toolkit is installed. Do NOT use when the user explicitly wants a guided end-to-end demo — use **quick-start** for that."
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# dlthub-router
|
|
7
|
+
|
|
8
|
+
Route the user to the right toolkit and skill, then install it. **Fast path first** — the always-loaded toolkit index (in your project rules / `AGENTS.md`) already maps intent → toolkit → install command → entry skill, so you usually do **not** need any discovery round-trip.
|
|
9
|
+
|
|
10
|
+
> **Router vs handovers.** This skill handles **cold start** — picking and installing a toolkit when none relevant is installed. Once inside a workflow, a toolkit's `workflow.md` **handover** sections take over: they carry context forward (pipeline name, dataset, destination) and route to a specific skill. Do **not** use this skill mid-workflow when the relevant toolkit is already installed. But when a handover names a toolkit that **isn't installed yet**, that's your cue — install it via the index below, then follow the handover's entry point + context.
|
|
11
|
+
|
|
12
|
+
## Step 1: Route from the always-loaded index (fast path)
|
|
13
|
+
|
|
14
|
+
The `# toolkits` index is already in your context. Match the user's intent to a row, then:
|
|
15
|
+
|
|
16
|
+
1. **Install** it: `dlthub --non-interactive ai toolkit install <name>`
|
|
17
|
+
2. **Confirm** (Step 3) and **hand over** to that toolkit's entry skill (Step 4).
|
|
18
|
+
|
|
19
|
+
This needs **no MCP call** — the index is authoritative for the shipped toolkits and is the fast path. Use it whenever the intent matches a row.
|
|
20
|
+
|
|
21
|
+
## Step 2: Live discovery (fallback only)
|
|
22
|
+
|
|
23
|
+
Use this **only** when the index has no matching row (an unfamiliar need, or you suspect a newer toolkit exists):
|
|
24
|
+
|
|
25
|
+
- **Prefer MCP** — `list_toolkits` from `dlt-workspace-mcp` for the live catalog, then `toolkit_info <name>` for skill details.
|
|
26
|
+
- **CLI fallback** (MCP not connected): `dlthub --non-interactive ai toolkit list`, then `dlthub --non-interactive ai toolkit info <name>`.
|
|
27
|
+
|
|
28
|
+
Match intent to the best toolkit, then install as in Step 1. Toolkits marked `(installed: <version>)` are already available.
|
|
29
|
+
|
|
30
|
+
## Step 3: Confirm & enable MCP
|
|
31
|
+
|
|
32
|
+
```
|
|
33
|
+
uv run dlthub ai status
|
|
34
|
+
```
|
|
35
|
+
1. You should see the new toolkit and its entry skill.
|
|
36
|
+
2. If you see any **WARNING** about the MCP server (e.g. cannot be started), **fix it** using the error message.
|
|
37
|
+
|
|
38
|
+
## Step 4: Handover (no restart needed)
|
|
39
|
+
|
|
40
|
+
The `dlt-workspace-mcp` server is already running (installed with `init`) and toolkits reuse it — installing one adds **no new MCP server**, so continue in this session. Do **not** ask the user to restart; that would lose the conversation context.
|
|
41
|
+
|
|
42
|
+
1. **Load the new toolkit inline** — prefer `toolkit_info <name>` (MCP), which is agent-agnostic and returns the entry skill + workflow rule. If MCP is unavailable, read the installed files directly; the install path depends on the agent (`.claude/`, `.cursor/`, or `.agents/`) — e.g. `<agent-dir>/skills/<entry-skill>/SKILL.md` and the toolkit's workflow rule.
|
|
43
|
+
2. **Follow that workflow rule and start at the entry skill**, continuing the user's task with the context you already have. Do not start unrelated workflows on your own.
|
|
44
|
+
3. The new skills become natively registered (`/`-invocable, always-loaded workflow rule) on the next natural session start — no need to restart now.
|
|
45
|
+
|
|
46
|
+
> Exception: if a future toolkit ever ships its **own** MCP server (none do today), that server only starts on restart — suggest a restart **only** in that case, and use CLI fallbacks until then.
|
|
47
|
+
|
|
48
|
+
<!-- Loading the new skill/rule inline is a stopgap: until the harness can hot-reload skills/rules after install, newly installed components aren't natively registered until the next session start. Tracked in dlt-hub/dlthub-ai-workbench-internal#72. -->
|
|
49
|
+
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: improve-skills
|
|
3
|
+
description: Improve existing skills based on the current session. Use at the end of a session (or when the user asks) to capture new debugging patterns, data issues, data validation tracks, querying techniques, doc references, or workflow improvements learned during the session. Keeps skills lean and personalized.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Improve skills from session
|
|
7
|
+
|
|
8
|
+
Review the current session and update skills with essential learnings. Skills are the team's institutional memory — keep them lean, specific, and actionable.
|
|
9
|
+
|
|
10
|
+
## Principles
|
|
11
|
+
|
|
12
|
+
- **Read the skill first** — always read the current SKILL.md before proposing changes. Don't duplicate what's already there.
|
|
13
|
+
- **Lean updates only** — add the minimum needed. A single bullet point or a 3-line code snippet is often enough.
|
|
14
|
+
- **Real problems only** — only add things that actually happened in this session or that the user explicitly asked to capture. No hypothetical scenarios.
|
|
15
|
+
- **Doc references matter** — if you found a docs page that was essential to solving a problem, add it as an "Essential Reading" link.
|
|
16
|
+
- **Tools and commands** — if a specific CLI command or MCP tool was key to diagnosing an issue, add it.
|
|
17
|
+
- **Don't restructure** — don't reorganize or rewrite existing skill content. Append to the right section or add a new subsection.
|
|
18
|
+
|
|
19
|
+
## Process
|
|
20
|
+
|
|
21
|
+
### 1. Scan the session for learnings
|
|
22
|
+
|
|
23
|
+
First identify which skills are active in the current toolkit — check the installed skills directory or use the `toolkit_info` MCP tool. Then review the conversation for:
|
|
24
|
+
|
|
25
|
+
**Errors and debugging**:
|
|
26
|
+
- New error types, root causes, and the commands or MCP tools that diagnosed them
|
|
27
|
+
- Config settings that helped (e.g., verbosity, timeouts, flags)
|
|
28
|
+
- Workarounds for API, source, or destination-specific behaviors
|
|
29
|
+
|
|
30
|
+
**Data and schema**:
|
|
31
|
+
- Unexpected data types or coercions needed
|
|
32
|
+
- Schema surprises (nesting, missing columns, naming conventions)
|
|
33
|
+
- Processing patterns that worked
|
|
34
|
+
|
|
35
|
+
**Data access and querying**:
|
|
36
|
+
- Library-specific gotchas (e.g., ibis, dlt dataset API)
|
|
37
|
+
- Useful query patterns or MCP tool calls
|
|
38
|
+
|
|
39
|
+
**Source and pipeline configuration**:
|
|
40
|
+
- Auth, pagination, or rate-limit quirks
|
|
41
|
+
- Config resolution surprises
|
|
42
|
+
- Source/resource parameterization patterns
|
|
43
|
+
|
|
44
|
+
**Workflow**:
|
|
45
|
+
- Missing cross-references between skills
|
|
46
|
+
- New skills that should be in the workflow
|
|
47
|
+
- Steps that are in the wrong order
|
|
48
|
+
|
|
49
|
+
Map each learning to the most relevant skill in the active toolkit.
|
|
50
|
+
|
|
51
|
+
### 2. Read the target skills
|
|
52
|
+
|
|
53
|
+
For each learning, read the relevant SKILL.md. Check:
|
|
54
|
+
- Is this already covered?
|
|
55
|
+
- Where does it fit best? (which section)
|
|
56
|
+
- Would it contradict or duplicate existing content?
|
|
57
|
+
|
|
58
|
+
### 3. Propose changes to the user
|
|
59
|
+
|
|
60
|
+
Present a summary of proposed updates:
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
Proposed skill updates:
|
|
64
|
+
|
|
65
|
+
<skill-name>:
|
|
66
|
+
+ [section] Added: <brief description>
|
|
67
|
+
|
|
68
|
+
<skill-name>:
|
|
69
|
+
(no changes)
|
|
70
|
+
|
|
71
|
+
<skill-name>:
|
|
72
|
+
+ [section] Added: <brief description>
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Get user approval before editing. The user may want to adjust, skip, or add more.
|
|
76
|
+
|
|
77
|
+
### 4. Apply changes
|
|
78
|
+
|
|
79
|
+
Edit the SKILL.md files. For each change:
|
|
80
|
+
- Add to the most relevant existing section
|
|
81
|
+
- If no section fits, add a new subsection at the end (before "Next steps" if present)
|
|
82
|
+
- Include doc links as `**Ref:** <url>` or `**Essential Reading:** <url>`
|
|
83
|
+
- Keep code examples minimal — 3-5 lines max
|
|
84
|
+
|
|
85
|
+
### 5. Update workflow if needed
|
|
86
|
+
|
|
87
|
+
Check if `workflow.md` cross-references are still accurate after skill changes. Update if new skills were added or handoff points changed.
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: setup-secrets
|
|
3
|
+
description: Safely manage dlthub secrets in *.secrets.toml. Use when the user directly asks to set up, configure, or inspect credentials (API keys, database passwords, tokens). Also use when writing Python code that needs to read secrets via dlt.secrets without exposing values. Do NOT use for pipeline creation, source discovery, or debugging pipeline execution — those skills call setup-secrets when they need credentials configured.
|
|
4
|
+
argument-hint: "[source-name]"
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Set up dlthub secrets
|
|
8
|
+
|
|
9
|
+
**Essential Reading** Credentials & config resolution: `https://dlthub.com/docs/general-usage/credentials/setup.md` `https://dlthub.com/docs/general-usage/credentials/advanced`
|
|
10
|
+
|
|
11
|
+
Configure credentials in `.dlt/secrets.toml`. **Never read secrets files directly** — use `dlt-workspace-mcp` tools or `dlthub ai secrets` CLI commands.
|
|
12
|
+
|
|
13
|
+
**Prefer MCP** — use `secrets_list`, `secrets_view_redacted`, `secrets_update_fragment` tools from `dlt-workspace-mcp`.
|
|
14
|
+
|
|
15
|
+
**CLI fallback**: If MCP is not connected, see [cli-reference.md](cli-reference.md) for equivalent `dlthub ai secrets` commands.
|
|
16
|
+
|
|
17
|
+
**Read additional docs as needed:**
|
|
18
|
+
- Connection string credentials (databases, warehouses): `https://dlthub.com/docs/general-usage/credentials/complex_types.md`
|
|
19
|
+
- Built-in credential types (`GcpServiceAccountCredentials`, `AwsCredentials`, etc.): `https://dlthub.com/docs/general-usage/credentials/complex_types.md#built-in-credentials`
|
|
20
|
+
- Destination-specific credentials: `https://dlthub.com/docs/dlt-ecosystem/destinations/`
|
|
21
|
+
|
|
22
|
+
Parse `$ARGUMENTS`:
|
|
23
|
+
- `source_name` or description of what credentials are needed (e.g. "stripe api key", "postgres credentials")
|
|
24
|
+
|
|
25
|
+
## 1. Figure out what to configure
|
|
26
|
+
|
|
27
|
+
If called from another skill, you already know the source, destination, and which fields are needed — skip to step 3.
|
|
28
|
+
|
|
29
|
+
If called standalone (e.g. user says "set up secrets" or hit `ConfigFieldMissingException`):
|
|
30
|
+
- Read the exception message — it tells you the exact field name and TOML path
|
|
31
|
+
- Read the pipeline script to find `dlt.secrets.value` parameters on `@dlt.source`/`@dlt.resource` functions
|
|
32
|
+
- Identify the destination type for required credentials
|
|
33
|
+
|
|
34
|
+
## 2. Find the right secrets file and inspect its shape
|
|
35
|
+
|
|
36
|
+
Use `secrets_list` to list workspace-scoped secrets files. Profile-scoped files (e.g. `.dlt/dev.secrets.toml`) appear first — **use those when present**, fall back to `.dlt/secrets.toml` otherwise.
|
|
37
|
+
|
|
38
|
+
**Pick the target file** from the list — you will pass it as `path` to `secrets_update_fragment` in step 4.
|
|
39
|
+
|
|
40
|
+
Then use `secrets_view_redacted` (no `path` argument) to see the **unified merged** view with values replaced by `***`. To inspect a specific file, pass `path=".dlt/<profile>.secrets.toml"`.
|
|
41
|
+
|
|
42
|
+
Look for:
|
|
43
|
+
- Which sections already exist (`[sources.<name>]`, `[destination.<name>]`)
|
|
44
|
+
- Which fields have real values (stars) vs placeholders (`<configure me>`)
|
|
45
|
+
- Whether the layout matches what the pipeline expects
|
|
46
|
+
|
|
47
|
+
Skip this step if you already know the secrets file is empty or doesn't exist.
|
|
48
|
+
|
|
49
|
+
## 3. Research credentials
|
|
50
|
+
|
|
51
|
+
Before asking the user for values:
|
|
52
|
+
- **Web search** the data source for how credentials are obtained (API docs, developer portal)
|
|
53
|
+
- Tell the user exactly what they need and where to get it (e.g. "Go to https://dashboard.stripe.com/apikeys")
|
|
54
|
+
- Explain what each credential field is for
|
|
55
|
+
|
|
56
|
+
## 4. Write secrets
|
|
57
|
+
|
|
58
|
+
Use `secrets_update_fragment` with `fragment` (TOML string) and `path` (target file from step 2). Creates the file if needed, deep-merges without overwriting other sections, returns the redacted result.
|
|
59
|
+
|
|
60
|
+
**CRITICAL: Only write placeholders** — never pass actual secret values through `secrets_update_fragment` or any other tool. The user fills in real values themselves by editing the file directly.
|
|
61
|
+
|
|
62
|
+
### Layout rules
|
|
63
|
+
|
|
64
|
+
**Always** scope secrets under the source or destination name:
|
|
65
|
+
|
|
66
|
+
```toml
|
|
67
|
+
[sources.<source_name>]
|
|
68
|
+
api_key = "<paste-your-api-key-here>"
|
|
69
|
+
|
|
70
|
+
[destination.<destination_name>.credentials]
|
|
71
|
+
host = "localhost"
|
|
72
|
+
port = 5432
|
|
73
|
+
database = "analytics"
|
|
74
|
+
username = "loader"
|
|
75
|
+
password = "<paste-your-password-here>"
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
`<source_name>` = `name=` arg on `@dlt.source`, or the function name if not set.
|
|
79
|
+
|
|
80
|
+
### Placeholders
|
|
81
|
+
|
|
82
|
+
Use **meaningful placeholders** that hint at the format:
|
|
83
|
+
- API keys: `"sk-*****-your-key"` or `"ak-xxxx-xxxx-xxxx"`
|
|
84
|
+
- Tokens: `"ghp_xxxxxxxxxxxxxxxxxxxx"` (GitHub), `"xoxb-xxxx"` (Slack)
|
|
85
|
+
- Passwords: `"<paste-your-password-here>"`
|
|
86
|
+
- URLs: `"https://your-instance.example.com"`
|
|
87
|
+
|
|
88
|
+
**Never** use the generic `"<configure me>"`.
|
|
89
|
+
|
|
90
|
+
## 5. Verify
|
|
91
|
+
|
|
92
|
+
Use `secrets_view_redacted` to see the unified merged view across all workspace secret files. Tell the user which fields still have placeholders and how to obtain real values.
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
## 6. Use secrets in Python
|
|
96
|
+
You can write Python scripts that read and use secrets without ever revealing them. `dlt.secrets` and `dlt.config` work as dictionaries using the same TOML paths shown by `view-redacted`.
|
|
97
|
+
|
|
98
|
+
Example: you need to call the GitHub REST API and `view-redacted` shows `[sources.github] api_key = "***"`:
|
|
99
|
+
```py
|
|
100
|
+
import dlt
|
|
101
|
+
import requests
|
|
102
|
+
|
|
103
|
+
# reads from secrets.toml [sources.github] api_key — never prints the value
|
|
104
|
+
api_key = dlt.secrets["sources.github.api_key"]
|
|
105
|
+
resp = requests.get(
|
|
106
|
+
"https://api.github.com/user",
|
|
107
|
+
headers={"Authorization": f"Bearer {api_key}"},
|
|
108
|
+
)
|
|
109
|
+
print(resp.json()["login"])
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
You can also retrieve typed credentials:
|
|
113
|
+
```py
|
|
114
|
+
from dlt.sources.credentials import GcpServiceAccountCredentials
|
|
115
|
+
|
|
116
|
+
creds = dlt.secrets.get("destination.bigquery.credentials", GcpServiceAccountCredentials)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
**Reference**: https://dlthub.com/docs/general-usage/credentials/advanced.md#access-configs-and-secrets-in-code
|
|
120
|
+
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# CLI reference for dlthub secrets
|
|
2
|
+
|
|
3
|
+
Use these commands when `dlt-workspace-mcp` is not connected. All MCP tools have CLI equivalents:
|
|
4
|
+
|
|
5
|
+
| MCP tool | CLI equivalent |
|
|
6
|
+
|---|---|
|
|
7
|
+
| `secrets_list` | `dlthub ai secrets list` |
|
|
8
|
+
| `secrets_view_redacted` | `dlthub ai secrets view-redacted [--path <file>]` |
|
|
9
|
+
| `secrets_update_fragment` | `dlthub ai secrets update-fragment --path <file> '<toml>'` |
|
|
10
|
+
|
|
11
|
+
## `secrets list`
|
|
12
|
+
|
|
13
|
+
Lists project-scoped secrets files. Profile-scoped files (e.g. `.dlt/dev.secrets.toml`) appear first.
|
|
14
|
+
|
|
15
|
+
```sh
|
|
16
|
+
dlthub ai secrets list
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## `secrets view-redacted`
|
|
20
|
+
|
|
21
|
+
Shows TOML structure with values replaced by `***`. Without `--path`, shows the unified merged view across all workspace secret files.
|
|
22
|
+
|
|
23
|
+
```sh
|
|
24
|
+
dlthub ai secrets view-redacted
|
|
25
|
+
dlthub ai secrets view-redacted --path .dlt/<profile>.secrets.toml
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## `secrets update-fragment`
|
|
29
|
+
|
|
30
|
+
Merges a TOML fragment into a secrets file. Creates the file if needed, deep-merges without overwriting other sections, prints the redacted result. `--path` is required.
|
|
31
|
+
|
|
32
|
+
### Linux / macOS
|
|
33
|
+
|
|
34
|
+
Use multiline single-quoted strings — all POSIX shells (bash, zsh, sh, dash, fish) pass real newlines:
|
|
35
|
+
|
|
36
|
+
```sh
|
|
37
|
+
dlthub ai secrets update-fragment --path .dlt/secrets.toml '[sources.stripe]
|
|
38
|
+
api_key = "sk-test-xxxxxxxxxxxx"
|
|
39
|
+
'
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
```sh
|
|
43
|
+
dlthub ai secrets update-fragment --path .dlt/secrets.toml '[destination.postgres.credentials]
|
|
44
|
+
host = "localhost"
|
|
45
|
+
port = 5432
|
|
46
|
+
database = "analytics"
|
|
47
|
+
username = "loader"
|
|
48
|
+
password = "<paste-your-password-here>"
|
|
49
|
+
'
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Profile-scoped:
|
|
53
|
+
```sh
|
|
54
|
+
dlthub ai secrets update-fragment --path .dlt/<profile>.secrets.toml '[sources.my_api]
|
|
55
|
+
api_key = "sk-xxxxxxxxxxxx"
|
|
56
|
+
'
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### Windows
|
|
60
|
+
|
|
61
|
+
Use `\n` for newlines in a single-line string. The CLI converts literal `\n` to real newlines before parsing:
|
|
62
|
+
|
|
63
|
+
```
|
|
64
|
+
dlthub ai secrets update-fragment --path .dlt/secrets.toml "[sources.stripe]\napi_key = \"sk-test-xxxxxxxxxxxx\""
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
```
|
|
68
|
+
dlthub ai secrets update-fragment --path .dlt/secrets.toml "[destination.postgres.credentials]\nhost = \"localhost\"\nport = 5432\ndatabase = \"analytics\"\nusername = \"loader\"\npassword = \"<paste-your-password-here>\""
|
|
69
|
+
```
|
dlthub_init/cli.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""Command-line entrypoint for the `dlthub-init` CLI."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from . import strings
|
|
10
|
+
from .collisions import Flags
|
|
11
|
+
from .config import DEFAULT_SCAFFOLD
|
|
12
|
+
from .display import (
|
|
13
|
+
console,
|
|
14
|
+
err_console,
|
|
15
|
+
print_collision,
|
|
16
|
+
print_header,
|
|
17
|
+
print_next_steps,
|
|
18
|
+
print_summary,
|
|
19
|
+
substep,
|
|
20
|
+
substep_detail,
|
|
21
|
+
)
|
|
22
|
+
from .errors import CollisionError, UvError, WorkspaceError
|
|
23
|
+
from .prompts import confirm
|
|
24
|
+
from .scaffold import apply_scaffold, resolve_target
|
|
25
|
+
from .skills import install_skills
|
|
26
|
+
from .uv import execute_uv_install, find_uv, run_uv_sync
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _ensure_utf8_io_on_windows() -> None:
|
|
30
|
+
if sys.platform != "win32":
|
|
31
|
+
return
|
|
32
|
+
for stream in (sys.stdout, sys.stderr):
|
|
33
|
+
reconfigure = getattr(stream, "reconfigure", None)
|
|
34
|
+
if reconfigure is None:
|
|
35
|
+
continue
|
|
36
|
+
try:
|
|
37
|
+
reconfigure(encoding="utf-8")
|
|
38
|
+
except (OSError, ValueError):
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
43
|
+
parser = argparse.ArgumentParser(
|
|
44
|
+
prog="dlthub-init",
|
|
45
|
+
description="Scaffold a dltHub workspace into a new or existing directory.",
|
|
46
|
+
)
|
|
47
|
+
parser.add_argument(
|
|
48
|
+
"project_dir",
|
|
49
|
+
nargs="?",
|
|
50
|
+
default=None,
|
|
51
|
+
help="Directory to initialize. Defaults to the current directory.",
|
|
52
|
+
)
|
|
53
|
+
parser.add_argument(
|
|
54
|
+
"--yes",
|
|
55
|
+
"-y",
|
|
56
|
+
action="store_true",
|
|
57
|
+
help=argparse.SUPPRESS,
|
|
58
|
+
)
|
|
59
|
+
parser.add_argument(
|
|
60
|
+
"--verbose",
|
|
61
|
+
"-v",
|
|
62
|
+
action="store_true",
|
|
63
|
+
help="Stream output from uv.",
|
|
64
|
+
)
|
|
65
|
+
parser.add_argument(
|
|
66
|
+
"--no-sync",
|
|
67
|
+
action="store_true",
|
|
68
|
+
help="Scaffold files only; do not create a virtual environment or install dependencies.",
|
|
69
|
+
)
|
|
70
|
+
parser.add_argument(
|
|
71
|
+
"--force",
|
|
72
|
+
action="store_true",
|
|
73
|
+
help="Overwrite existing generated files (never secrets).",
|
|
74
|
+
)
|
|
75
|
+
parser.add_argument(
|
|
76
|
+
"--merge",
|
|
77
|
+
action="store_true",
|
|
78
|
+
help="Append missing entries to an existing .gitignore instead of skipping it.",
|
|
79
|
+
)
|
|
80
|
+
parser.add_argument(
|
|
81
|
+
"--no-pyproject",
|
|
82
|
+
action="store_true",
|
|
83
|
+
help="Skip pyproject.toml.",
|
|
84
|
+
)
|
|
85
|
+
parser.add_argument(
|
|
86
|
+
"--no-gitignore",
|
|
87
|
+
action="store_true",
|
|
88
|
+
help="Skip .gitignore.",
|
|
89
|
+
)
|
|
90
|
+
return parser
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def main(argv: list[str] | None = None) -> int:
|
|
94
|
+
_ensure_utf8_io_on_windows()
|
|
95
|
+
args = build_parser().parse_args(argv)
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
run(args)
|
|
99
|
+
except KeyboardInterrupt:
|
|
100
|
+
console.print(strings.MSG_CANCELLED)
|
|
101
|
+
return 130
|
|
102
|
+
except CollisionError as exc:
|
|
103
|
+
print_collision(exc.conflicts)
|
|
104
|
+
return 2
|
|
105
|
+
except WorkspaceError as exc:
|
|
106
|
+
console.print(strings.MSG_ERROR_PREFIX.format(message=exc))
|
|
107
|
+
return 1
|
|
108
|
+
except Exception as exc:
|
|
109
|
+
console.print(strings.MSG_UNEXPECTED_ERROR.format(message=exc))
|
|
110
|
+
if args.verbose:
|
|
111
|
+
console.print_exception()
|
|
112
|
+
else:
|
|
113
|
+
console.print(strings.MSG_UNEXPECTED_ERROR_HINT)
|
|
114
|
+
return 1
|
|
115
|
+
return 0
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def run(args: argparse.Namespace) -> None:
|
|
119
|
+
if args.yes:
|
|
120
|
+
err_console.print(strings.MSG_TESTING_SHORTCUT_NOTE)
|
|
121
|
+
|
|
122
|
+
verbose = args.verbose
|
|
123
|
+
scaffold = DEFAULT_SCAFFOLD
|
|
124
|
+
flags = Flags(
|
|
125
|
+
force=args.force,
|
|
126
|
+
merge=args.merge,
|
|
127
|
+
no_pyproject=args.no_pyproject,
|
|
128
|
+
no_gitignore=args.no_gitignore,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
project_dir = resolve_target(args.project_dir)
|
|
132
|
+
print_header(project_dir)
|
|
133
|
+
|
|
134
|
+
plan = apply_scaffold(project_dir, scaffold=scaffold, flags=flags)
|
|
135
|
+
installed_skills = install_skills(project_dir)
|
|
136
|
+
print_summary(plan)
|
|
137
|
+
if installed_skills:
|
|
138
|
+
substep_detail(strings.MSG_SKILLS_INSTALLED.format(count=len(installed_skills)))
|
|
139
|
+
|
|
140
|
+
synced = _maybe_sync(project_dir, args, verbose=verbose)
|
|
141
|
+
print_next_steps(project_dir, synced=synced, uv_installed=find_uv() is not None)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _maybe_sync(project_dir: Path, args: argparse.Namespace, *, verbose: bool) -> bool:
|
|
145
|
+
if args.no_sync:
|
|
146
|
+
console.print(strings.MSG_SKIPPED_SYNC)
|
|
147
|
+
return False
|
|
148
|
+
|
|
149
|
+
uv_executable = find_uv()
|
|
150
|
+
if uv_executable is None and not (args.yes or confirm(strings.PROMPT_INSTALL_UV, default=True)):
|
|
151
|
+
console.print(strings.MSG_SKIPPED_SYNC)
|
|
152
|
+
return False
|
|
153
|
+
|
|
154
|
+
if not args.yes and not confirm(strings.PROMPT_CREATE_VENV, default=True):
|
|
155
|
+
console.print(strings.MSG_SKIPPED_SYNC)
|
|
156
|
+
return False
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
if uv_executable is None:
|
|
160
|
+
uv_executable = execute_uv_install(verbose=verbose)
|
|
161
|
+
with substep(strings.MSG_INSTALLING_DEPS, strings.MSG_INSTALLED_DEPS, verbose=verbose):
|
|
162
|
+
run_uv_sync(uv_executable, project_dir, verbose=verbose)
|
|
163
|
+
except UvError as exc:
|
|
164
|
+
console.print(strings.MSG_SYNC_FAILED.format(message=exc))
|
|
165
|
+
return False
|
|
166
|
+
return True
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
if __name__ == "__main__":
|
|
170
|
+
sys.exit(main())
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Non-destructive write planning for `dlthub-init`."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
PYPROJECT = Path("pyproject.toml")
|
|
10
|
+
UV_LOCK = Path("uv.lock")
|
|
11
|
+
GITIGNORE = Path(".gitignore")
|
|
12
|
+
WORKSPACE_MARKER = Path(".dlt") / ".workspace"
|
|
13
|
+
SECRET_FILES = frozenset({Path(".dlt") / "secrets.toml"})
|
|
14
|
+
MERGEABLE_FILES = frozenset({GITIGNORE})
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Outcome(Enum):
|
|
18
|
+
CREATE = "create"
|
|
19
|
+
OVERWRITE = "overwrite"
|
|
20
|
+
MERGE = "merge"
|
|
21
|
+
SKIP = "skip"
|
|
22
|
+
DISABLED = "disabled"
|
|
23
|
+
CONFLICT = "conflict"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(frozen=True)
|
|
27
|
+
class Flags:
|
|
28
|
+
force: bool = False
|
|
29
|
+
merge: bool = False
|
|
30
|
+
no_pyproject: bool = False
|
|
31
|
+
no_gitignore: bool = False
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass(frozen=True)
|
|
35
|
+
class PlannedPath:
|
|
36
|
+
relative: Path
|
|
37
|
+
outcome: Outcome
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def build_plan(relatives: list[Path], project_dir: Path, flags: Flags) -> list[PlannedPath]:
|
|
41
|
+
outcomes = {rel: _classify(rel, project_dir, flags) for rel in relatives}
|
|
42
|
+
_couple_lock_to_pyproject(outcomes, project_dir)
|
|
43
|
+
return [PlannedPath(rel, outcomes[rel]) for rel in sorted(outcomes)]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def conflicts(plan: list[PlannedPath]) -> list[str]:
|
|
47
|
+
return [str(p.relative) for p in plan if p.outcome is Outcome.CONFLICT]
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _classify(rel: Path, project_dir: Path, flags: Flags) -> Outcome:
|
|
51
|
+
if _disabled_by_flag(rel, flags):
|
|
52
|
+
return Outcome.DISABLED
|
|
53
|
+
if not (project_dir / rel).exists():
|
|
54
|
+
return Outcome.CREATE
|
|
55
|
+
if rel in SECRET_FILES:
|
|
56
|
+
return Outcome.SKIP
|
|
57
|
+
if rel in MERGEABLE_FILES:
|
|
58
|
+
return Outcome.MERGE if flags.merge else Outcome.SKIP
|
|
59
|
+
if flags.force:
|
|
60
|
+
return Outcome.OVERWRITE
|
|
61
|
+
if rel == WORKSPACE_MARKER:
|
|
62
|
+
return Outcome.CONFLICT
|
|
63
|
+
return Outcome.SKIP
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _couple_lock_to_pyproject(outcomes: dict[Path, Outcome], project_dir: Path) -> None:
|
|
67
|
+
# The bundled uv.lock only matches the bundled pyproject, so ship them together.
|
|
68
|
+
if UV_LOCK not in outcomes:
|
|
69
|
+
return
|
|
70
|
+
if outcomes.get(PYPROJECT) in (Outcome.CREATE, Outcome.OVERWRITE):
|
|
71
|
+
outcomes[UV_LOCK] = Outcome.OVERWRITE if (project_dir / UV_LOCK).exists() else Outcome.CREATE
|
|
72
|
+
elif (project_dir / UV_LOCK).exists():
|
|
73
|
+
outcomes[UV_LOCK] = Outcome.SKIP
|
|
74
|
+
else:
|
|
75
|
+
del outcomes[UV_LOCK]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _disabled_by_flag(rel: Path, flags: Flags) -> bool:
|
|
79
|
+
if rel == PYPROJECT:
|
|
80
|
+
return flags.no_pyproject
|
|
81
|
+
if rel == GITIGNORE:
|
|
82
|
+
return flags.no_gitignore
|
|
83
|
+
return False
|