khora 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khora/__init__.py +6 -0
- khora/__main__.py +101 -0
- khora/agents/__init__.py +6 -0
- khora/agents/data_fetcher.py +158 -0
- khora/agents/pipeline_builder.py +217 -0
- khora/pipelines/__init__.py +6 -0
- khora/pipelines/data_pipeline.py +131 -0
- khora/pipelines/definitions.py +14 -0
- khora/tools/__init__.py +7 -0
- khora/tools/api_tool.py +81 -0
- khora/tools/google_docs_tool.py +169 -0
- khora/tools/web_scraper_tool.py +197 -0
- khora/utils/__init__.py +6 -0
- khora/utils/config.py +54 -0
- khora/utils/data_models.py +57 -0
- khora-0.0.1.dist-info/METADATA +309 -0
- khora-0.0.1.dist-info/RECORD +19 -0
- khora-0.0.1.dist-info/WHEEL +4 -0
- khora-0.0.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,309 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: khora
|
3
|
+
Version: 0.0.1
|
4
|
+
Summary: Ad-hoc Dagster pipelines for data fetching using AI/LLM prompts and agentic AI
|
5
|
+
Project-URL: Homepage, https://github.com/yourusername/khora
|
6
|
+
Project-URL: Repository, https://github.com/yourusername/khora
|
7
|
+
Project-URL: Issues, https://github.com/yourusername/khora/issues
|
8
|
+
Author: Khora Team
|
9
|
+
License: MIT
|
10
|
+
License-File: LICENSE
|
11
|
+
Keywords: ai,dagster,data-fetching,langchain,langgraph,pipelines
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
13
|
+
Classifier: Intended Audience :: Developers
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
16
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
18
|
+
Requires-Python: ==3.12.*
|
19
|
+
Requires-Dist: dagster-webserver>=1.7.0
|
20
|
+
Requires-Dist: dagster>=1.7.0
|
21
|
+
Requires-Dist: google-api-python-client>=2.120.0
|
22
|
+
Requires-Dist: google-auth-httplib2>=0.2.0
|
23
|
+
Requires-Dist: google-auth-oauthlib>=1.2.0
|
24
|
+
Requires-Dist: httpx>=0.27.0
|
25
|
+
Requires-Dist: langchain-community>=0.2.0
|
26
|
+
Requires-Dist: langchain-openai>=0.1.0
|
27
|
+
Requires-Dist: langchain>=0.2.0
|
28
|
+
Requires-Dist: langgraph>=0.1.0
|
29
|
+
Requires-Dist: pandas>=2.2.0
|
30
|
+
Requires-Dist: playwright>=1.45.0
|
31
|
+
Requires-Dist: pydantic>=2.7.0
|
32
|
+
Requires-Dist: python-dotenv>=1.0.0
|
33
|
+
Provides-Extra: dev
|
34
|
+
Requires-Dist: black>=24.4.0; extra == 'dev'
|
35
|
+
Requires-Dist: build>=1.2.0; extra == 'dev'
|
36
|
+
Requires-Dist: isort>=5.13.0; extra == 'dev'
|
37
|
+
Requires-Dist: mypy>=1.10.0; extra == 'dev'
|
38
|
+
Requires-Dist: pre-commit>=3.7.0; extra == 'dev'
|
39
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
|
40
|
+
Requires-Dist: pytest-cov>=5.0.0; extra == 'dev'
|
41
|
+
Requires-Dist: pytest>=8.2.0; extra == 'dev'
|
42
|
+
Requires-Dist: ruff>=0.5.0; extra == 'dev'
|
43
|
+
Requires-Dist: twine>=5.1.0; extra == 'dev'
|
44
|
+
Requires-Dist: types-python-dateutil>=2.9.0; extra == 'dev'
|
45
|
+
Requires-Dist: types-pyyaml>=6.0.0; extra == 'dev'
|
46
|
+
Requires-Dist: types-requests>=2.32.0; extra == 'dev'
|
47
|
+
Description-Content-Type: text/markdown
|
48
|
+
|
49
|
+
# Khora
|
50
|
+
|
51
|
+
Ad-hoc Dagster pipelines for data fetching using AI/LLM prompts and agentic AI.
|
52
|
+
|
53
|
+
## Overview
|
54
|
+
|
55
|
+
Khora is a Python package that enables the creation of dynamic data pipelines using Dagster, powered by AI agents built with LangGraph and LangChain. It can fetch data from various sources including:
|
56
|
+
|
57
|
+
- APIs (REST endpoints with full HTTP method support)
|
58
|
+
- Websites (advanced web scraping using Playwright - handles JavaScript, takes screenshots, executes custom scripts)
|
59
|
+
- Google Docs/Sheets (with service account authentication)
|
60
|
+
|
61
|
+
## Features
|
62
|
+
|
63
|
+
- 🤖 AI-powered data fetching using natural language prompts
|
64
|
+
- 🔄 Dynamic pipeline generation based on descriptions
|
65
|
+
- 🛠️ Support for multiple data sources:
|
66
|
+
- APIs (REST endpoints)
|
67
|
+
- Web scraping with Playwright (handles JavaScript-rendered content)
|
68
|
+
- Google Docs and Sheets
|
69
|
+
- 🎭 Advanced web scraping capabilities:
|
70
|
+
- JavaScript execution
|
71
|
+
- Screenshot capture
|
72
|
+
- Custom selectors
|
73
|
+
- Wait conditions
|
74
|
+
- 📊 Integration with Dagster for orchestration
|
75
|
+
- 🐳 Docker support for easy deployment
|
76
|
+
- ✅ Comprehensive test coverage
|
77
|
+
|
78
|
+
## Installation
|
79
|
+
|
80
|
+
### Using uv (recommended)
|
81
|
+
|
82
|
+
```bash
|
83
|
+
uv pip install khora
|
84
|
+
```
|
85
|
+
|
86
|
+
### Using pip
|
87
|
+
|
88
|
+
```bash
|
89
|
+
pip install khora
|
90
|
+
```
|
91
|
+
|
92
|
+
### Development Installation
|
93
|
+
|
94
|
+
```bash
|
95
|
+
git clone https://github.com/yourusername/khora.git
|
96
|
+
cd khora
|
97
|
+
uv pip install -e ".[dev]"
|
98
|
+
```
|
99
|
+
|
100
|
+
## Configuration
|
101
|
+
|
102
|
+
1. Copy the environment template:
|
103
|
+
```bash
|
104
|
+
cp .env.example .env
|
105
|
+
```
|
106
|
+
|
107
|
+
2. Edit `.env` and add your credentials:
|
108
|
+
- `OPENAI_API_KEY`: Your OpenAI API key
|
109
|
+
- `GOOGLE_CREDENTIALS_PATH`: Path to Google service account credentials (for Google Docs/Sheets)
|
110
|
+
|
111
|
+
## Usage
|
112
|
+
|
113
|
+
### Basic Example
|
114
|
+
|
115
|
+
```python
|
116
|
+
from khora.agents import DataFetcherAgent, PipelineBuilderAgent
|
117
|
+
from khora.utils.data_models import DataRequest, DataSourceType
|
118
|
+
|
119
|
+
# Initialize agents
|
120
|
+
fetcher = DataFetcherAgent(openai_api_key="your-key")
|
121
|
+
builder = PipelineBuilderAgent(openai_api_key="your-key")
|
122
|
+
|
123
|
+
# Create a data request
|
124
|
+
request = DataRequest(
|
125
|
+
source_type=DataSourceType.API,
|
126
|
+
prompt="Fetch current weather data for San Francisco",
|
127
|
+
source_config={
|
128
|
+
"url": "https://api.weather.com/v1/current"
|
129
|
+
}
|
130
|
+
)
|
131
|
+
|
132
|
+
# Fetch data
|
133
|
+
response = await fetcher.fetch_data(request)
|
134
|
+
print(response.data)
|
135
|
+
```
|
136
|
+
|
137
|
+
### Creating Dynamic Pipelines
|
138
|
+
|
139
|
+
```python
|
140
|
+
# Describe your pipeline in natural language
|
141
|
+
description = """
|
142
|
+
Create a pipeline that:
|
143
|
+
1. Fetches cryptocurrency prices from CoinGecko API
|
144
|
+
2. Scrapes latest crypto news from CoinDesk
|
145
|
+
3. Reads analysis from a Google Sheet
|
146
|
+
"""
|
147
|
+
|
148
|
+
# Generate pipeline configuration
|
149
|
+
config = builder.analyze_pipeline_request(description)
|
150
|
+
|
151
|
+
# Build and execute the pipeline
|
152
|
+
pipeline = builder.build_pipeline(config)
|
153
|
+
```
|
154
|
+
|
155
|
+
### Running Dagster UI
|
156
|
+
|
157
|
+
```bash
|
158
|
+
dagster dev -f src/khora/pipelines/definitions.py
|
159
|
+
```
|
160
|
+
|
161
|
+
Then navigate to http://localhost:3000 to see the Dagster UI.
|
162
|
+
|
163
|
+
## Docker Usage
|
164
|
+
|
165
|
+
### Build the image
|
166
|
+
|
167
|
+
```bash
|
168
|
+
docker build -t khora:latest .
|
169
|
+
```
|
170
|
+
|
171
|
+
### Run the container
|
172
|
+
|
173
|
+
```bash
|
174
|
+
docker run -p 3000:3000 \
|
175
|
+
-e OPENAI_API_KEY=your-key \
|
176
|
+
-v $(pwd)/.env:/app/.env \
|
177
|
+
khora:latest
|
178
|
+
```
|
179
|
+
|
180
|
+
## Testing
|
181
|
+
|
182
|
+
Run the test suite:
|
183
|
+
|
184
|
+
```bash
|
185
|
+
pytest tests/
|
186
|
+
```
|
187
|
+
|
188
|
+
With coverage:
|
189
|
+
|
190
|
+
```bash
|
191
|
+
pytest tests/ --cov=khora --cov-report=html
|
192
|
+
```
|
193
|
+
|
194
|
+
## Requirements
|
195
|
+
|
196
|
+
- Python 3.12 (required)
|
197
|
+
- Playwright browsers (automatically installed during setup)
|
198
|
+
|
199
|
+
## CI/CD
|
200
|
+
|
201
|
+
The project uses GitHub Actions for CI/CD with two main workflows:
|
202
|
+
|
203
|
+
### Main CI Workflow (`ci.yml`)
|
204
|
+
1. Runs tests on Python 3.12
|
205
|
+
2. Checks code formatting with Black and Ruff
|
206
|
+
3. Performs type checking with mypy
|
207
|
+
4. Builds and tests the Docker image
|
208
|
+
5. Uploads coverage reports to Codecov
|
209
|
+
|
210
|
+
### Publish Workflow (`publish.yml`)
|
211
|
+
**Automatically publishes to PyPI** when version tags are pushed:
|
212
|
+
- Triggered by pushing tags matching `v*` pattern (e.g., `v0.0.2`)
|
213
|
+
- Runs full test suite and quality checks
|
214
|
+
- Builds and publishes package to PyPI
|
215
|
+
- Uses `PYPI_API_TOKEN` secret for authentication
|
216
|
+
|
217
|
+
## Project Structure
|
218
|
+
|
219
|
+
```
|
220
|
+
khora/
|
221
|
+
├── src/khora/
|
222
|
+
│ ├── agents/ # AI agents for data fetching and pipeline building
|
223
|
+
│ ├── pipelines/ # Dagster pipeline definitions
|
224
|
+
│ ├── tools/ # Tools for different data sources
|
225
|
+
│ └── utils/ # Utilities and data models
|
226
|
+
├── tests/ # Test suite
|
227
|
+
├── .github/workflows/ # CI/CD configuration
|
228
|
+
├── Dockerfile # Container definition
|
229
|
+
└── pyproject.toml # Project configuration
|
230
|
+
```
|
231
|
+
|
232
|
+
## Contributing
|
233
|
+
|
234
|
+
1. Fork the repository
|
235
|
+
2. Create a feature branch: `git checkout -b feature-name`
|
236
|
+
3. Make your changes and add tests
|
237
|
+
4. Run tests and linting: `pytest && black . && ruff check .`
|
238
|
+
5. Commit your changes: `git commit -m "Add feature"`
|
239
|
+
6. Push to your fork: `git push origin feature-name`
|
240
|
+
7. Create a pull request
|
241
|
+
|
242
|
+
## License
|
243
|
+
|
244
|
+
MIT License - see LICENSE file for details.
|
245
|
+
|
246
|
+
## Support
|
247
|
+
|
248
|
+
For issues and questions:
|
249
|
+
- Open an issue on GitHub
|
250
|
+
- Check the documentation
|
251
|
+
- Review existing discussions
|
252
|
+
|
253
|
+
## Roadmap
|
254
|
+
|
255
|
+
- [ ] Add support for more data sources (databases, S3, etc.)
|
256
|
+
- [ ] Implement data transformation capabilities
|
257
|
+
- [ ] Add scheduling and monitoring features
|
258
|
+
- [ ] Create a web UI for pipeline management
|
259
|
+
- [ ] Support for more LLM providers
|
260
|
+
|
261
|
+
## Releasing
|
262
|
+
|
263
|
+
### Quick Release (Recommended)
|
264
|
+
|
265
|
+
Use the automated release script:
|
266
|
+
|
267
|
+
```bash
|
268
|
+
# Create and push a patch release (0.0.1 -> 0.0.2)
|
269
|
+
python scripts/create_release.py patch --push
|
270
|
+
|
271
|
+
# Create a minor release (0.0.1 -> 0.1.0)
|
272
|
+
python scripts/create_release.py minor
|
273
|
+
|
274
|
+
# Create a major release (0.0.1 -> 1.0.0)
|
275
|
+
python scripts/create_release.py major
|
276
|
+
|
277
|
+
# Preview what would happen
|
278
|
+
python scripts/create_release.py patch --dry-run
|
279
|
+
```
|
280
|
+
|
281
|
+
### Step-by-Step Release
|
282
|
+
|
283
|
+
1. **Bump version**:
|
284
|
+
```bash
|
285
|
+
python scripts/bump_version.py patch
|
286
|
+
```
|
287
|
+
|
288
|
+
2. **Create git tag and push**:
|
289
|
+
```bash
|
290
|
+
git add .
|
291
|
+
git commit -m "Bump version to 0.0.2"
|
292
|
+
git tag v0.0.2
|
293
|
+
git push origin main --tags
|
294
|
+
```
|
295
|
+
|
296
|
+
3. **Automatic publishing**: The publish workflow will automatically:
|
297
|
+
- Run all tests and quality checks
|
298
|
+
- Build the package
|
299
|
+
- Publish to PyPI
|
300
|
+
|
301
|
+
### Setup PyPI Token
|
302
|
+
|
303
|
+
To enable publishing, add your PyPI API token as a GitHub secret:
|
304
|
+
1. Create an API token on [PyPI](https://pypi.org/manage/account/token/)
|
305
|
+
2. Add it as `PYPI_API_TOKEN` in your repository secrets
|
306
|
+
|
307
|
+
## Version
|
308
|
+
|
309
|
+
Current version: 0.0.1
|
@@ -0,0 +1,19 @@
|
|
1
|
+
khora/__init__.py,sha256=57Qyzpc9KKmx2da2WHvNxkNpmTghYvkSQKeHOZSVJ7M,156
|
2
|
+
khora/__main__.py,sha256=9gS87YH1OpoQSi5qKmhleEg2OfRJupU7NDYegYZ4VCk,3391
|
3
|
+
khora/agents/__init__.py,sha256=Ao0tZ5Jev1xjrOyeTdvGjc98KIddNvtT1GQy0shfQQA,201
|
4
|
+
khora/agents/data_fetcher.py,sha256=i2astoIjJWsPF97YS2yWT37EgGqLOBKX0DjUJRajfKk,5267
|
5
|
+
khora/agents/pipeline_builder.py,sha256=vh7E49stVmnZSwitx5bdbPl6NbZaNF3qPx5HgBR7nuY,7045
|
6
|
+
khora/pipelines/__init__.py,sha256=eySn11UiivczP2-qy1CHoT0Cnhow3gCA3rdhIvJy0SA,177
|
7
|
+
khora/pipelines/data_pipeline.py,sha256=Ku1bkMTc_leTmp2ASEIw57EAEjoQcqkjZdmBqO7KIhs,4185
|
8
|
+
khora/pipelines/definitions.py,sha256=KrXKxngT32o5PhiwU28pkmE-het_VPkFaUzhJSu7-1Q,375
|
9
|
+
khora/tools/__init__.py,sha256=FKtXUJq2q0WRo97UGcijZ4e8EhvINmR542BkQ2gGvHA,230
|
10
|
+
khora/tools/api_tool.py,sha256=zrXN8FmxMu3zb47-VmCK8ZO4YkDdVq7Uf_A6bk7xU4U,2584
|
11
|
+
khora/tools/google_docs_tool.py,sha256=3x81fT62ahjgr1ElzOKyrGzFiFI-KOfGi7Z_OEyFcf0,6204
|
12
|
+
khora/tools/web_scraper_tool.py,sha256=YHN5fTuoXml8h9rH1QIixS4qOz5KoKS9K67Z1ufCzz0,7757
|
13
|
+
khora/utils/__init__.py,sha256=hv1gS2PmHpXDH7GR6ES2mIrG6OQFLn39-tMe8cPQps8,179
|
14
|
+
khora/utils/config.py,sha256=w82viT8sOkIQQ3jPR7a83RNyTJ2X4kO9TucPh-5u9Sw,1882
|
15
|
+
khora/utils/data_models.py,sha256=AZEgNGfZuF0vcy3G3Ia8w4C5l7xvtptT5Ok5lg18vOo,1702
|
16
|
+
khora-0.0.1.dist-info/METADATA,sha256=ZVkfAOwoGev0Ctfr7GOBq6FdaTYIRWTbwpGbfXAAY1Y,8227
|
17
|
+
khora-0.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
18
|
+
khora-0.0.1.dist-info/licenses/LICENSE,sha256=CDPNN8KV5XyIgCQD_iVkETOAQLCsdFc7zObGg1x06tA,1075
|
19
|
+
khora-0.0.1.dist-info/RECORD,,
|
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2024 Khora Contributors
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|