khora 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khora-0.0.1/.cursor/environment.json +12 -0
- khora-0.0.1/.cursor/rules/pre-commit.mdc +10 -0
- khora-0.0.1/.cursor/rules/run-tests.mdc +10 -0
- khora-0.0.1/.env.example +19 -0
- khora-0.0.1/.github/workflows/ci.yml +74 -0
- khora-0.0.1/.github/workflows/publish.yml +51 -0
- khora-0.0.1/.gitignore +207 -0
- khora-0.0.1/.pre-commit-config.yaml +41 -0
- khora-0.0.1/CONTRIBUTING.md +168 -0
- khora-0.0.1/Dockerfile +85 -0
- khora-0.0.1/LICENSE +21 -0
- khora-0.0.1/PKG-INFO +309 -0
- khora-0.0.1/README.md +261 -0
- khora-0.0.1/examples/example_pipeline.py +140 -0
- khora-0.0.1/examples/playwright_scraping.py +154 -0
- khora-0.0.1/pyproject.toml +139 -0
- khora-0.0.1/scripts/bump_version.py +130 -0
- khora-0.0.1/scripts/create_release.py +128 -0
- khora-0.0.1/setup.sh +61 -0
- khora-0.0.1/src/khora/__init__.py +6 -0
- khora-0.0.1/src/khora/__main__.py +101 -0
- khora-0.0.1/src/khora/agents/__init__.py +6 -0
- khora-0.0.1/src/khora/agents/data_fetcher.py +158 -0
- khora-0.0.1/src/khora/agents/pipeline_builder.py +217 -0
- khora-0.0.1/src/khora/pipelines/__init__.py +6 -0
- khora-0.0.1/src/khora/pipelines/data_pipeline.py +131 -0
- khora-0.0.1/src/khora/pipelines/definitions.py +14 -0
- khora-0.0.1/src/khora/tools/__init__.py +7 -0
- khora-0.0.1/src/khora/tools/api_tool.py +81 -0
- khora-0.0.1/src/khora/tools/google_docs_tool.py +169 -0
- khora-0.0.1/src/khora/tools/web_scraper_tool.py +197 -0
- khora-0.0.1/src/khora/utils/__init__.py +6 -0
- khora-0.0.1/src/khora/utils/config.py +54 -0
- khora-0.0.1/src/khora/utils/data_models.py +57 -0
- khora-0.0.1/tests/__init__.py +1 -0
- khora-0.0.1/tests/integration/__init__.py +1 -0
- khora-0.0.1/tests/unit/__init__.py +1 -0
- khora-0.0.1/tests/unit/test_config.py +64 -0
- khora-0.0.1/tests/unit/test_data_models.py +89 -0
- khora-0.0.1/tests/unit/test_tools.py +159 -0
- khora-0.0.1/uv.lock +2425 -0
khora-0.0.1/.env.example
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# OpenAI Configuration
|
2
|
+
OPENAI_API_KEY=your_openai_api_key_here
|
3
|
+
OPENAI_MODEL=gpt-4-turbo-preview
|
4
|
+
|
5
|
+
# Google Configuration (for Google Docs/Sheets access)
|
6
|
+
GOOGLE_CREDENTIALS_PATH=/path/to/google-credentials.json
|
7
|
+
|
8
|
+
# Dagster Configuration
|
9
|
+
DAGSTER_HOME=/tmp/dagster
|
10
|
+
DAGSTER_PG_HOST=localhost
|
11
|
+
DAGSTER_PG_PORT=5432
|
12
|
+
DAGSTER_PG_DB=dagster
|
13
|
+
DAGSTER_PG_USER=dagster
|
14
|
+
DAGSTER_PG_PASSWORD=
|
15
|
+
|
16
|
+
# General Configuration
|
17
|
+
LOG_LEVEL=INFO
|
18
|
+
CACHE_ENABLED=true
|
19
|
+
CACHE_TTL=3600
|
@@ -0,0 +1,74 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches: [ main, develop ]
|
6
|
+
pull_request:
|
7
|
+
branches: [ main ]
|
8
|
+
|
9
|
+
env:
|
10
|
+
PYTHON_VERSION: "3.12"
|
11
|
+
|
12
|
+
jobs:
|
13
|
+
test:
|
14
|
+
runs-on: ubuntu-latest
|
15
|
+
|
16
|
+
steps:
|
17
|
+
- uses: actions/checkout@v4
|
18
|
+
|
19
|
+
- name: Install uv
|
20
|
+
uses: astral-sh/setup-uv@v3
|
21
|
+
with:
|
22
|
+
version: "latest"
|
23
|
+
|
24
|
+
- name: Set up Python 3.12
|
25
|
+
uses: actions/setup-python@v5
|
26
|
+
with:
|
27
|
+
python-version: "3.12"
|
28
|
+
|
29
|
+
- name: Install dependencies
|
30
|
+
run: |
|
31
|
+
uv pip install --system -e ".[dev]"
|
32
|
+
|
33
|
+
- name: Install Playwright browsers
|
34
|
+
run: |
|
35
|
+
playwright install chromium
|
36
|
+
playwright install-deps
|
37
|
+
|
38
|
+
- name: Run linting
|
39
|
+
run: |
|
40
|
+
black --check src tests
|
41
|
+
isort --check-only src tests
|
42
|
+
ruff check src tests
|
43
|
+
|
44
|
+
- name: Run type checking
|
45
|
+
run: |
|
46
|
+
mypy src
|
47
|
+
|
48
|
+
- name: Run tests
|
49
|
+
run: |
|
50
|
+
pytest tests/unit -v
|
51
|
+
|
52
|
+
build-docker:
|
53
|
+
runs-on: ubuntu-latest
|
54
|
+
needs: test
|
55
|
+
|
56
|
+
steps:
|
57
|
+
- uses: actions/checkout@v4
|
58
|
+
|
59
|
+
- name: Set up Docker Buildx
|
60
|
+
uses: docker/setup-buildx-action@v3
|
61
|
+
|
62
|
+
- name: Build Docker image
|
63
|
+
uses: docker/build-push-action@v5
|
64
|
+
with:
|
65
|
+
context: .
|
66
|
+
load: true
|
67
|
+
push: false
|
68
|
+
tags: khora:latest
|
69
|
+
cache-from: type=gha
|
70
|
+
cache-to: type=gha,mode=max
|
71
|
+
|
72
|
+
- name: Run Docker image tests
|
73
|
+
run: |
|
74
|
+
docker run --rm khora:latest python -c "import khora; print(khora.__version__)"
|
@@ -0,0 +1,51 @@
|
|
1
|
+
name: publish
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
tags:
|
6
|
+
- 'v*'
|
7
|
+
|
8
|
+
jobs:
|
9
|
+
publish-tag:
|
10
|
+
name: Build and publish Python distributions to PyPI
|
11
|
+
runs-on: ubuntu-latest
|
12
|
+
steps:
|
13
|
+
- name: Checkout source
|
14
|
+
uses: actions/checkout@v4
|
15
|
+
|
16
|
+
- name: Set up Python
|
17
|
+
uses: actions/setup-python@v5
|
18
|
+
with:
|
19
|
+
python-version: "3.12"
|
20
|
+
|
21
|
+
- name: Install uv
|
22
|
+
uses: astral-sh/setup-uv@v3
|
23
|
+
with:
|
24
|
+
version: "latest"
|
25
|
+
|
26
|
+
- name: Install Playwright browsers
|
27
|
+
run: |
|
28
|
+
uv pip install --system playwright
|
29
|
+
playwright install chromium
|
30
|
+
|
31
|
+
- name: Install dependencies and run tests
|
32
|
+
run: |
|
33
|
+
uv pip install --system -e ".[dev]"
|
34
|
+
pytest tests/unit -v
|
35
|
+
|
36
|
+
- name: Run code quality checks
|
37
|
+
run: |
|
38
|
+
black --check src tests
|
39
|
+
ruff check src tests
|
40
|
+
|
41
|
+
- name: Build source and wheel distributions
|
42
|
+
run: |
|
43
|
+
python -m pip install --upgrade build twine
|
44
|
+
python -m build
|
45
|
+
twine check dist/*
|
46
|
+
|
47
|
+
- name: Publish distribution to PyPI
|
48
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
49
|
+
with:
|
50
|
+
user: __token__
|
51
|
+
password: ${{ secrets.PYPI_API_TOKEN }}
|
khora-0.0.1/.gitignore
ADDED
@@ -0,0 +1,207 @@
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
2
|
+
__pycache__/
|
3
|
+
*.py[codz]
|
4
|
+
*$py.class
|
5
|
+
|
6
|
+
# C extensions
|
7
|
+
*.so
|
8
|
+
|
9
|
+
# Distribution / packaging
|
10
|
+
.Python
|
11
|
+
build/
|
12
|
+
develop-eggs/
|
13
|
+
dist/
|
14
|
+
downloads/
|
15
|
+
eggs/
|
16
|
+
.eggs/
|
17
|
+
lib/
|
18
|
+
lib64/
|
19
|
+
parts/
|
20
|
+
sdist/
|
21
|
+
var/
|
22
|
+
wheels/
|
23
|
+
share/python-wheels/
|
24
|
+
*.egg-info/
|
25
|
+
.installed.cfg
|
26
|
+
*.egg
|
27
|
+
MANIFEST
|
28
|
+
|
29
|
+
# PyInstaller
|
30
|
+
# Usually these files are written by a python script from a template
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32
|
+
*.manifest
|
33
|
+
*.spec
|
34
|
+
|
35
|
+
# Installer logs
|
36
|
+
pip-log.txt
|
37
|
+
pip-delete-this-directory.txt
|
38
|
+
|
39
|
+
# Unit test / coverage reports
|
40
|
+
htmlcov/
|
41
|
+
.tox/
|
42
|
+
.nox/
|
43
|
+
.coverage
|
44
|
+
.coverage.*
|
45
|
+
.cache
|
46
|
+
nosetests.xml
|
47
|
+
coverage.xml
|
48
|
+
*.cover
|
49
|
+
*.py.cover
|
50
|
+
.hypothesis/
|
51
|
+
.pytest_cache/
|
52
|
+
cover/
|
53
|
+
|
54
|
+
# Translations
|
55
|
+
*.mo
|
56
|
+
*.pot
|
57
|
+
|
58
|
+
# Django stuff:
|
59
|
+
*.log
|
60
|
+
local_settings.py
|
61
|
+
db.sqlite3
|
62
|
+
db.sqlite3-journal
|
63
|
+
|
64
|
+
# Flask stuff:
|
65
|
+
instance/
|
66
|
+
.webassets-cache
|
67
|
+
|
68
|
+
# Scrapy stuff:
|
69
|
+
.scrapy
|
70
|
+
|
71
|
+
# Sphinx documentation
|
72
|
+
docs/_build/
|
73
|
+
|
74
|
+
# PyBuilder
|
75
|
+
.pybuilder/
|
76
|
+
target/
|
77
|
+
|
78
|
+
# Jupyter Notebook
|
79
|
+
.ipynb_checkpoints
|
80
|
+
|
81
|
+
# IPython
|
82
|
+
profile_default/
|
83
|
+
ipython_config.py
|
84
|
+
|
85
|
+
# pyenv
|
86
|
+
# For a library or package, you might want to ignore these files since the code is
|
87
|
+
# intended to run in multiple environments; otherwise, check them in:
|
88
|
+
# .python-version
|
89
|
+
|
90
|
+
# pipenv
|
91
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94
|
+
# install all needed dependencies.
|
95
|
+
#Pipfile.lock
|
96
|
+
|
97
|
+
# UV
|
98
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
99
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100
|
+
# commonly ignored for libraries.
|
101
|
+
#uv.lock
|
102
|
+
|
103
|
+
# poetry
|
104
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
105
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
106
|
+
# commonly ignored for libraries.
|
107
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
108
|
+
#poetry.lock
|
109
|
+
#poetry.toml
|
110
|
+
|
111
|
+
# pdm
|
112
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
113
|
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
114
|
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
115
|
+
#pdm.lock
|
116
|
+
#pdm.toml
|
117
|
+
.pdm-python
|
118
|
+
.pdm-build/
|
119
|
+
|
120
|
+
# pixi
|
121
|
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
122
|
+
#pixi.lock
|
123
|
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
124
|
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
125
|
+
.pixi
|
126
|
+
|
127
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
128
|
+
__pypackages__/
|
129
|
+
|
130
|
+
# Celery stuff
|
131
|
+
celerybeat-schedule
|
132
|
+
celerybeat.pid
|
133
|
+
|
134
|
+
# SageMath parsed files
|
135
|
+
*.sage.py
|
136
|
+
|
137
|
+
# Environments
|
138
|
+
.env
|
139
|
+
.envrc
|
140
|
+
.venv
|
141
|
+
env/
|
142
|
+
venv/
|
143
|
+
ENV/
|
144
|
+
env.bak/
|
145
|
+
venv.bak/
|
146
|
+
|
147
|
+
# Spyder project settings
|
148
|
+
.spyderproject
|
149
|
+
.spyproject
|
150
|
+
|
151
|
+
# Rope project settings
|
152
|
+
.ropeproject
|
153
|
+
|
154
|
+
# mkdocs documentation
|
155
|
+
/site
|
156
|
+
|
157
|
+
# mypy
|
158
|
+
.mypy_cache/
|
159
|
+
.dmypy.json
|
160
|
+
dmypy.json
|
161
|
+
|
162
|
+
# Pyre type checker
|
163
|
+
.pyre/
|
164
|
+
|
165
|
+
# pytype static type analyzer
|
166
|
+
.pytype/
|
167
|
+
|
168
|
+
# Cython debug symbols
|
169
|
+
cython_debug/
|
170
|
+
|
171
|
+
# PyCharm
|
172
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
173
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
174
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
175
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
176
|
+
#.idea/
|
177
|
+
|
178
|
+
# Abstra
|
179
|
+
# Abstra is an AI-powered process automation framework.
|
180
|
+
# Ignore directories containing user credentials, local state, and settings.
|
181
|
+
# Learn more at https://abstra.io/docs
|
182
|
+
.abstra/
|
183
|
+
|
184
|
+
# Visual Studio Code
|
185
|
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
186
|
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
187
|
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
188
|
+
# you could uncomment the following to ignore the entire vscode folder
|
189
|
+
# .vscode/
|
190
|
+
|
191
|
+
# Ruff stuff:
|
192
|
+
.ruff_cache/
|
193
|
+
|
194
|
+
# PyPI configuration file
|
195
|
+
.pypirc
|
196
|
+
|
197
|
+
# Cursor
|
198
|
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
199
|
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
200
|
+
# refer to https://docs.cursor.com/context/ignore-files
|
201
|
+
.cursorignore
|
202
|
+
.cursorindexingignore
|
203
|
+
|
204
|
+
# Marimo
|
205
|
+
marimo/_static/
|
206
|
+
marimo/_lsp/
|
207
|
+
__marimo__/
|
@@ -0,0 +1,41 @@
|
|
1
|
+
repos:
|
2
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
3
|
+
rev: v4.6.0
|
4
|
+
hooks:
|
5
|
+
- id: trailing-whitespace
|
6
|
+
- id: end-of-file-fixer
|
7
|
+
- id: check-yaml
|
8
|
+
- id: check-added-large-files
|
9
|
+
args: ['--maxkb=1000']
|
10
|
+
- id: check-merge-conflict
|
11
|
+
- id: check-toml
|
12
|
+
|
13
|
+
- repo: https://github.com/psf/black
|
14
|
+
rev: 24.4.2
|
15
|
+
hooks:
|
16
|
+
- id: black
|
17
|
+
language_version: python3.12
|
18
|
+
|
19
|
+
- repo: https://github.com/pycqa/isort
|
20
|
+
rev: 5.13.2
|
21
|
+
hooks:
|
22
|
+
- id: isort
|
23
|
+
name: isort
|
24
|
+
|
25
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
26
|
+
rev: v0.5.1
|
27
|
+
hooks:
|
28
|
+
- id: ruff
|
29
|
+
args: [--fix, --exit-non-zero-on-fix]
|
30
|
+
|
31
|
+
- repo: https://github.com/pre-commit/mirrors-mypy
|
32
|
+
rev: v1.10.1
|
33
|
+
hooks:
|
34
|
+
- id: mypy
|
35
|
+
additional_dependencies:
|
36
|
+
- types-requests
|
37
|
+
- types-pyyaml
|
38
|
+
- types-python-dateutil
|
39
|
+
- pydantic>=2.7.0
|
40
|
+
args: [--config-file=pyproject.toml, src]
|
41
|
+
pass_filenames: false
|
@@ -0,0 +1,168 @@
|
|
1
|
+
# Contributing to Khora
|
2
|
+
|
3
|
+
Thank you for your interest in contributing to Khora! This guide will help you get started.
|
4
|
+
|
5
|
+
## Development Setup
|
6
|
+
|
7
|
+
1. **Clone the repository**:
|
8
|
+
```bash
|
9
|
+
git clone https://github.com/yourusername/khora.git
|
10
|
+
cd khora
|
11
|
+
```
|
12
|
+
|
13
|
+
2. **Run the setup script**:
|
14
|
+
```bash
|
15
|
+
./setup.sh
|
16
|
+
```
|
17
|
+
|
18
|
+
3. **Activate the virtual environment**:
|
19
|
+
```bash
|
20
|
+
source .venv/bin/activate
|
21
|
+
```
|
22
|
+
|
23
|
+
## Development Workflow
|
24
|
+
|
25
|
+
1. **Create a feature branch**:
|
26
|
+
```bash
|
27
|
+
git checkout -b feature/your-feature-name
|
28
|
+
```
|
29
|
+
|
30
|
+
2. **Make your changes**:
|
31
|
+
- Write code following the project style
|
32
|
+
- Add tests for new functionality
|
33
|
+
- Update documentation as needed
|
34
|
+
|
35
|
+
3. **Run tests and checks**:
|
36
|
+
```bash
|
37
|
+
# Run tests
|
38
|
+
pytest tests/
|
39
|
+
|
40
|
+
# Format code
|
41
|
+
black src tests
|
42
|
+
ruff format src tests
|
43
|
+
|
44
|
+
# Check linting
|
45
|
+
ruff check src tests
|
46
|
+
|
47
|
+
# Type checking
|
48
|
+
mypy src
|
49
|
+
```
|
50
|
+
|
51
|
+
4. **Commit and push**:
|
52
|
+
```bash
|
53
|
+
git add .
|
54
|
+
git commit -m "Add your feature description"
|
55
|
+
git push origin feature/your-feature-name
|
56
|
+
```
|
57
|
+
|
58
|
+
5. **Create a Pull Request**:
|
59
|
+
- Open a PR against the `main` branch
|
60
|
+
- Provide a clear description of your changes
|
61
|
+
- Wait for review and CI checks to pass
|
62
|
+
|
63
|
+
## Code Style
|
64
|
+
|
65
|
+
- Use Black for code formatting (line length: 88)
|
66
|
+
- Follow Ruff linting rules
|
67
|
+
- Use type hints where appropriate
|
68
|
+
- Write docstrings for functions and classes
|
69
|
+
- Keep functions focused and concise
|
70
|
+
|
71
|
+
## Testing
|
72
|
+
|
73
|
+
- Write unit tests for new functionality
|
74
|
+
- Place tests in the appropriate `tests/` subdirectory
|
75
|
+
- Use descriptive test names
|
76
|
+
- Mock external dependencies
|
77
|
+
- Aim for good test coverage
|
78
|
+
|
79
|
+
## Releasing (Maintainers Only)
|
80
|
+
|
81
|
+
### PyPI Setup for Publishing
|
82
|
+
|
83
|
+
The project uses PyPI API tokens for publishing. To set this up:
|
84
|
+
|
85
|
+
1. **Create PyPI API Token**:
|
86
|
+
- Go to [PyPI account settings](https://pypi.org/manage/account/token/)
|
87
|
+
- Create a new API token with upload permissions
|
88
|
+
- Copy the token (starts with `pypi-`)
|
89
|
+
|
90
|
+
2. **Add GitHub Secret**:
|
91
|
+
- Go to repository Settings > Secrets and variables > Actions
|
92
|
+
- Create a new secret named `PYPI_API_TOKEN`
|
93
|
+
- Paste your PyPI API token as the value
|
94
|
+
|
95
|
+
### Creating a Release
|
96
|
+
|
97
|
+
Use the automated release script:
|
98
|
+
|
99
|
+
```bash
|
100
|
+
# Create complete release with tests and git operations
|
101
|
+
python scripts/create_release.py patch
|
102
|
+
|
103
|
+
# Or with automatic push
|
104
|
+
python scripts/create_release.py patch --push
|
105
|
+
|
106
|
+
# Preview changes first
|
107
|
+
python scripts/create_release.py minor --dry-run
|
108
|
+
```
|
109
|
+
|
110
|
+
The publish workflow will automatically:
|
111
|
+
1. Trigger when you push a version tag (e.g., `v0.0.2`)
|
112
|
+
2. Run all tests and quality checks
|
113
|
+
3. Build the package
|
114
|
+
4. Publish to PyPI if all checks pass
|
115
|
+
|
116
|
+
### Version Numbering
|
117
|
+
|
118
|
+
Follow [Semantic Versioning](https://semver.org/):
|
119
|
+
- **MAJOR**: Breaking changes
|
120
|
+
- **MINOR**: New features (backwards compatible)
|
121
|
+
- **PATCH**: Bug fixes (backwards compatible)
|
122
|
+
|
123
|
+
## Project Structure
|
124
|
+
|
125
|
+
```
|
126
|
+
khora/
|
127
|
+
├── src/khora/ # Main package
|
128
|
+
│ ├── agents/ # AI agents
|
129
|
+
│ ├── pipelines/ # Dagster pipelines
|
130
|
+
│ ├── tools/ # Data source tools
|
131
|
+
│ └── utils/ # Utilities
|
132
|
+
├── tests/ # Test suite
|
133
|
+
│ ├── unit/ # Unit tests
|
134
|
+
│ └── integration/ # Integration tests
|
135
|
+
├── scripts/ # Development scripts
|
136
|
+
├── examples/ # Usage examples
|
137
|
+
└── .github/workflows/ # CI/CD
|
138
|
+
```
|
139
|
+
|
140
|
+
## Adding New Data Sources
|
141
|
+
|
142
|
+
To add a new data source:
|
143
|
+
|
144
|
+
1. **Create a tool** in `src/khora/tools/`:
|
145
|
+
- Inherit from `BaseTool`
|
146
|
+
- Implement `_run()` and `_arun()` methods
|
147
|
+
- Add proper error handling
|
148
|
+
|
149
|
+
2. **Add to enum** in `src/khora/utils/data_models.py`:
|
150
|
+
- Add new source type to `DataSourceType`
|
151
|
+
|
152
|
+
3. **Update agent** in `src/khora/agents/data_fetcher.py`:
|
153
|
+
- Add tool to the `tools` dictionary
|
154
|
+
|
155
|
+
4. **Write tests**:
|
156
|
+
- Add unit tests in `tests/unit/test_tools.py`
|
157
|
+
- Mock external dependencies
|
158
|
+
|
159
|
+
5. **Add examples**:
|
160
|
+
- Create usage examples in `examples/`
|
161
|
+
|
162
|
+
## Getting Help
|
163
|
+
|
164
|
+
- Open an issue for bugs or feature requests
|
165
|
+
- Join discussions for questions
|
166
|
+
- Check existing issues before creating new ones
|
167
|
+
|
168
|
+
Thank you for contributing! 🚀
|
khora-0.0.1/Dockerfile
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
# Build stage
|
2
|
+
FROM python:3.12-slim AS builder
|
3
|
+
|
4
|
+
# Install system dependencies
|
5
|
+
RUN apt-get update && apt-get install -y \
|
6
|
+
gcc \
|
7
|
+
g++ \
|
8
|
+
&& rm -rf /var/lib/apt/lists/*
|
9
|
+
|
10
|
+
# Install uv
|
11
|
+
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
|
12
|
+
|
13
|
+
# Set working directory
|
14
|
+
WORKDIR /app
|
15
|
+
|
16
|
+
# Copy project files
|
17
|
+
COPY pyproject.toml .
|
18
|
+
COPY README.md .
|
19
|
+
COPY src/ src/
|
20
|
+
|
21
|
+
# Install dependencies
|
22
|
+
RUN uv pip install --system --no-cache .
|
23
|
+
|
24
|
+
# Runtime stage
|
25
|
+
FROM python:3.12-slim
|
26
|
+
|
27
|
+
# Install runtime dependencies and Playwright dependencies
|
28
|
+
RUN apt-get update && apt-get install -y \
|
29
|
+
libgomp1 \
|
30
|
+
# Playwright dependencies
|
31
|
+
libnss3 \
|
32
|
+
libnspr4 \
|
33
|
+
libatk1.0-0 \
|
34
|
+
libatk-bridge2.0-0 \
|
35
|
+
libcups2 \
|
36
|
+
libdrm2 \
|
37
|
+
libdbus-1-3 \
|
38
|
+
libatspi2.0-0 \
|
39
|
+
libx11-6 \
|
40
|
+
libxcomposite1 \
|
41
|
+
libxdamage1 \
|
42
|
+
libxext6 \
|
43
|
+
libxfixes3 \
|
44
|
+
libxrandr2 \
|
45
|
+
libgbm1 \
|
46
|
+
libxcb1 \
|
47
|
+
libxkbcommon0 \
|
48
|
+
libpango-1.0-0 \
|
49
|
+
libcairo2 \
|
50
|
+
libasound2 \
|
51
|
+
&& rm -rf /var/lib/apt/lists/*
|
52
|
+
|
53
|
+
# Create non-root user
|
54
|
+
RUN useradd -m -u 1000 khora
|
55
|
+
|
56
|
+
# Set working directory
|
57
|
+
WORKDIR /app
|
58
|
+
|
59
|
+
# Copy from builder
|
60
|
+
COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
|
61
|
+
COPY --from=builder /usr/local/bin /usr/local/bin
|
62
|
+
COPY --from=builder /app/src /app/src
|
63
|
+
|
64
|
+
# Copy configuration files
|
65
|
+
COPY .env.example .env
|
66
|
+
|
67
|
+
# Set environment variables
|
68
|
+
ENV PYTHONPATH=/app/src
|
69
|
+
ENV DAGSTER_HOME=/app/dagster_home
|
70
|
+
|
71
|
+
# Create directories
|
72
|
+
RUN mkdir -p /app/dagster_home && \
|
73
|
+
chown -R khora:khora /app
|
74
|
+
|
75
|
+
# Switch to non-root user
|
76
|
+
USER khora
|
77
|
+
|
78
|
+
# Install Playwright browsers
|
79
|
+
RUN playwright install chromium
|
80
|
+
|
81
|
+
# Expose Dagster web UI port
|
82
|
+
EXPOSE 3000
|
83
|
+
|
84
|
+
# Default command
|
85
|
+
CMD ["dagster", "dev", "-f", "src/khora/pipelines/definitions.py", "-p", "3000"]
|
khora-0.0.1/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2024 Khora Contributors
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|