glean-indexing-sdk 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- glean_indexing_sdk-0.3.0/.claude/commands/release.md +77 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/.cz.toml +1 -1
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/.github/workflows/ci.yml +9 -12
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/CHANGELOG.md +6 -0
- glean_indexing_sdk-0.3.0/CLAUDE.md +73 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/CONTRIBUTING.md +8 -8
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/PKG-INFO +2 -1
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/README.md +1 -0
- glean_indexing_sdk-0.3.0/RELEASE.md +84 -0
- glean_indexing_sdk-0.3.0/mise.toml +159 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/pyproject.toml +7 -1
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/snippets/streaming/article_connector.py +1 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/__init__.py +18 -18
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/connectors/__init__.py +7 -5
- glean_indexing_sdk-0.3.0/src/glean/indexing/connectors/base_async_streaming_data_client.py +42 -0
- glean_indexing_sdk-0.3.0/src/glean/indexing/connectors/base_async_streaming_datasource_connector.py +233 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/connectors/base_data_client.py +0 -4
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/connectors/base_datasource_connector.py +4 -3
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/connectors/base_people_connector.py +4 -3
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/connectors/base_streaming_data_client.py +0 -4
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/connectors/base_streaming_datasource_connector.py +6 -4
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/testing/mock_glean_client.py +1 -0
- glean_indexing_sdk-0.3.0/tests/unit_tests/test_async_streaming.py +236 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/tests/unit_tests/test_base_datasource_connector.py +3 -2
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/uv.lock +26 -14
- glean_indexing_sdk-0.2.0/RELEASE.md +0 -87
- glean_indexing_sdk-0.2.0/mise.toml +0 -4
- glean_indexing_sdk-0.2.0/taskfile.yml +0 -262
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/.env.template +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/.github/CODEOWNERS +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/.github/workflows/publish.yml +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/.gitignore +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/.markdown-coderc.json +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/.python-version +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/.ruff.toml +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/.vscode/settings.json +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/LICENSE +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/env.template +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/snippets/non_streaming/complete.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/snippets/non_streaming/run_connector.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/snippets/non_streaming/wiki_connector.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/snippets/non_streaming/wiki_data_client.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/snippets/non_streaming/wiki_page_data.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/snippets/streaming/article_data.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/snippets/streaming/article_data_client.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/snippets/streaming/run_connector.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/common/__init__.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/common/batch_processor.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/common/content_formatter.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/common/glean_client.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/common/metrics.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/common/mocks.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/common/property_definition_builder.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/connectors/base_connector.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/models.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/observability/__init__.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/observability/observability.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/py.typed +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/testing/__init__.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/testing/connector_test_harness.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/testing/mock_data_source.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/testing/response_validator.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/tests/__init__.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/tests/integration_tests/__init__.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/tests/unit_tests/__init__.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/tests/unit_tests/common/__init__.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/tests/unit_tests/common/mock_clients.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/tests/unit_tests/common/test_batch_processor.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/tests/unit_tests/common/test_content_formatter.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/tests/unit_tests/common/test_metrics.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/tests/unit_tests/common/test_property_definition_builder.py +1 -1
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/tests/unit_tests/test_base_connector.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/tests/unit_tests/test_base_data_client.py +0 -0
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/tests/unit_tests/test_base_people_connector.py +1 -1
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/tests/unit_tests/test_base_streaming_datasource_connector.py +1 -1
- {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/tests/unit_tests/test_custom_connector_integration.py +1 -1
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# Release Process
|
|
2
|
+
|
|
3
|
+
Guide the user through releasing a new version of the Glean Indexing SDK.
|
|
4
|
+
|
|
5
|
+
## Instructions
|
|
6
|
+
|
|
7
|
+
Follow these steps to release a new version:
|
|
8
|
+
|
|
9
|
+
### Step 1: Pre-flight Checks
|
|
10
|
+
|
|
11
|
+
Run the following checks to ensure the codebase is ready for release:
|
|
12
|
+
|
|
13
|
+
1. **Check git status**: Ensure working directory is clean and on `main` branch
|
|
14
|
+
2. **Pull latest**: `git pull origin main`
|
|
15
|
+
3. **Run tests**: `mise run test`
|
|
16
|
+
4. **Run linting**: `mise run lint`
|
|
17
|
+
|
|
18
|
+
If any checks fail, stop and fix the issues before proceeding.
|
|
19
|
+
|
|
20
|
+
### Step 2: Preview the Release
|
|
21
|
+
|
|
22
|
+
Run a dry-run to see what version bump will occur and what the changelog will look like:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
DRY_RUN=true mise run release
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Show the user:
|
|
29
|
+
- The current version (from `pyproject.toml`)
|
|
30
|
+
- The new version that will be created
|
|
31
|
+
- The changelog entries that will be added
|
|
32
|
+
|
|
33
|
+
Ask the user to confirm they want to proceed with the release.
|
|
34
|
+
|
|
35
|
+
### Step 3: Create the Release
|
|
36
|
+
|
|
37
|
+
If the user confirms, run the actual release:
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
mise run release
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
This will:
|
|
44
|
+
- Bump the version in `pyproject.toml`
|
|
45
|
+
- Update `CHANGELOG.md`
|
|
46
|
+
- Create a git commit with the version bump
|
|
47
|
+
- Create a git tag (e.g., `v0.2.1`)
|
|
48
|
+
|
|
49
|
+
### Step 4: Push to Remote
|
|
50
|
+
|
|
51
|
+
Push the commit and tag to the remote:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
git push origin main --follow-tags
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
**Important:** Pushing the tag triggers the GitHub Actions workflow (`.github/workflows/publish.yml`) which automatically:
|
|
58
|
+
- Builds the package
|
|
59
|
+
- Creates a GitHub Release with changelog
|
|
60
|
+
- Publishes to PyPI
|
|
61
|
+
|
|
62
|
+
### Step 5: Verify the Release
|
|
63
|
+
|
|
64
|
+
After pushing, guide the user to verify:
|
|
65
|
+
|
|
66
|
+
1. Check the GitHub Actions workflow is running: `gh run list --limit 3`
|
|
67
|
+
2. Watch the workflow: `gh run watch`
|
|
68
|
+
3. Once complete, verify:
|
|
69
|
+
- GitHub Release exists: `gh release view vX.Y.Z`
|
|
70
|
+
- PyPI package is published: https://pypi.org/project/glean-indexing-sdk/
|
|
71
|
+
|
|
72
|
+
## Summary
|
|
73
|
+
|
|
74
|
+
At the end, provide a summary:
|
|
75
|
+
- New version number
|
|
76
|
+
- Link to the GitHub release: https://github.com/gleanwork/glean-indexing-sdk/releases/tag/vX.Y.Z
|
|
77
|
+
- Link to PyPI: https://pypi.org/project/glean-indexing-sdk/X.Y.Z/
|
|
@@ -22,13 +22,13 @@ jobs:
|
|
|
22
22
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
23
23
|
|
|
24
24
|
- name: Setup development environment
|
|
25
|
-
run:
|
|
25
|
+
run: mise run setup
|
|
26
26
|
|
|
27
27
|
- name: Run linters
|
|
28
|
-
run:
|
|
28
|
+
run: mise run lint
|
|
29
29
|
|
|
30
30
|
- name: Build package
|
|
31
|
-
run:
|
|
31
|
+
run: mise run build
|
|
32
32
|
|
|
33
33
|
test:
|
|
34
34
|
name: Test Python ${{ matrix.python-version }}
|
|
@@ -44,23 +44,20 @@ jobs:
|
|
|
44
44
|
uses: jdx/mise-action@v2
|
|
45
45
|
with:
|
|
46
46
|
cache: true
|
|
47
|
-
mise_toml: |
|
|
48
|
-
[tools]
|
|
49
|
-
python = "${{ matrix.python-version }}"
|
|
50
|
-
task = "latest"
|
|
51
|
-
uv = "latest"
|
|
52
|
-
|
|
53
47
|
env:
|
|
54
48
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
55
49
|
|
|
50
|
+
- name: Set Python version for matrix
|
|
51
|
+
run: mise use python@${{ matrix.python-version }}
|
|
52
|
+
|
|
56
53
|
- name: Setup development environment
|
|
57
|
-
run:
|
|
54
|
+
run: mise run setup
|
|
58
55
|
|
|
59
56
|
- name: Run tests
|
|
60
|
-
run:
|
|
57
|
+
run: mise run test
|
|
61
58
|
|
|
62
59
|
- name: Test package installation
|
|
63
60
|
run: |
|
|
64
|
-
|
|
61
|
+
mise run build
|
|
65
62
|
uv run pip install dist/*.whl
|
|
66
63
|
uv run python -c "import glean.indexing; print('Package installed successfully')"
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## Project Overview
|
|
6
|
+
|
|
7
|
+
Python SDK for building custom Glean indexing connectors. Provides base classes and utilities to create connectors that fetch data from external systems and upload to Glean's indexing APIs.
|
|
8
|
+
|
|
9
|
+
## Commands
|
|
10
|
+
|
|
11
|
+
Uses [mise](https://mise.jdx.dev/) (`brew install mise`) for toolchain and task management with `uv` for Python dependencies.
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
# Setup
|
|
15
|
+
mise run setup # Create venv and install all dependencies
|
|
16
|
+
|
|
17
|
+
# Testing
|
|
18
|
+
mise run test # Run all tests
|
|
19
|
+
mise run test:watch # Run tests in watch mode
|
|
20
|
+
mise run test:cov # Run tests with coverage
|
|
21
|
+
|
|
22
|
+
# Linting
|
|
23
|
+
mise run lint # Run all linters (ruff, pyright, readme)
|
|
24
|
+
mise run lint:fix # Auto-fix lint issues and format code
|
|
25
|
+
|
|
26
|
+
# Building
|
|
27
|
+
mise run build # Build the package
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Run a single test:
|
|
31
|
+
```bash
|
|
32
|
+
uv run pytest tests/unit_tests/test_base_connector.py -v
|
|
33
|
+
uv run pytest tests/unit_tests/test_base_connector.py::TestClassName::test_method -v
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Architecture
|
|
37
|
+
|
|
38
|
+
### Core Abstractions
|
|
39
|
+
|
|
40
|
+
**Connector hierarchy** (`src/glean/indexing/connectors/`):
|
|
41
|
+
- `BaseConnector` - Abstract base defining `get_data()`, `transform()`, `index_data()`
|
|
42
|
+
- `BaseDatasourceConnector` - For document/content indexing (fits in memory)
|
|
43
|
+
- `BaseStreamingDatasourceConnector` - For large/paginated datasets (yields data via sync generator)
|
|
44
|
+
- `BaseAsyncStreamingDatasourceConnector` - For large datasets with async APIs (yields data via async generator)
|
|
45
|
+
- `BasePeopleConnector` - For employee/identity indexing
|
|
46
|
+
|
|
47
|
+
**Data clients** (`src/glean/indexing/connectors/`):
|
|
48
|
+
- `BaseDataClient[T]` - Fetches all data at once, returns `Sequence[T]`
|
|
49
|
+
- `BaseStreamingDataClient[T]` - Yields data incrementally via `Generator[T]`
|
|
50
|
+
- `BaseAsyncStreamingDataClient[T]` - Yields data incrementally via `AsyncGenerator[T]`
|
|
51
|
+
|
|
52
|
+
### Pattern: Implementing a Connector
|
|
53
|
+
|
|
54
|
+
1. Define data type as `TypedDict`
|
|
55
|
+
2. Create data client extending `BaseDataClient[YourType]`
|
|
56
|
+
3. Create connector extending `BaseDatasourceConnector[YourType]`
|
|
57
|
+
4. Set `configuration: CustomDatasourceConfig` class attribute
|
|
58
|
+
5. Implement `transform()` to convert source data to `DocumentDefinition`
|
|
59
|
+
|
|
60
|
+
### Key Modules
|
|
61
|
+
|
|
62
|
+
- `models.py` - Type definitions, `IndexingMode`, `DocumentDefinition`, etc.
|
|
63
|
+
- `common/glean_client.py` - API client wrapper (uses env vars `GLEAN_INSTANCE`, `GLEAN_INDEXING_API_TOKEN`)
|
|
64
|
+
- `common/batch_processor.py` - Batches data for upload
|
|
65
|
+
- `observability/` - Logging decorators and metrics tracking
|
|
66
|
+
- `testing/` - `ConnectorTestHarness`, `MockGleanClient` for testing without API calls
|
|
67
|
+
|
|
68
|
+
## Code Style
|
|
69
|
+
|
|
70
|
+
- Line length: 160 characters
|
|
71
|
+
- Docstrings: Google style
|
|
72
|
+
- Type hints required (pyright basic mode)
|
|
73
|
+
- Ruff for linting and formatting
|
|
@@ -7,11 +7,11 @@ Thank you for your interest in contributing to the Glean Connector SDK! This doc
|
|
|
7
7
|
1. Clone the repository
|
|
8
8
|
2. Set up your environment:
|
|
9
9
|
```bash
|
|
10
|
-
# Install
|
|
11
|
-
brew install
|
|
10
|
+
# Install mise if not already installed
|
|
11
|
+
brew install mise
|
|
12
12
|
|
|
13
13
|
# Set up development environment
|
|
14
|
-
|
|
14
|
+
mise run setup
|
|
15
15
|
```
|
|
16
16
|
|
|
17
17
|
## Development Workflow
|
|
@@ -27,8 +27,8 @@ We use the following workflow for development:
|
|
|
27
27
|
|
|
28
28
|
3. Run linting and tests:
|
|
29
29
|
```bash
|
|
30
|
-
|
|
31
|
-
|
|
30
|
+
mise run lint:fix
|
|
31
|
+
mise run test:all
|
|
32
32
|
```
|
|
33
33
|
|
|
34
34
|
4. Commit your changes using commitizen:
|
|
@@ -43,7 +43,7 @@ We use the following workflow for development:
|
|
|
43
43
|
We follow standard Python code styles:
|
|
44
44
|
|
|
45
45
|
- Use [Ruff](https://github.com/astral-sh/ruff) for linting and formatting
|
|
46
|
-
- Use [
|
|
46
|
+
- Use [Pyright](https://github.com/microsoft/pyright) for type checking
|
|
47
47
|
- Follow [type hints](https://docs.python.org/3/library/typing.html) in all code
|
|
48
48
|
|
|
49
49
|
## Testing
|
|
@@ -57,10 +57,10 @@ We use [commitizen](https://commitizen-tools.github.io/commitizen/) for versioni
|
|
|
57
57
|
|
|
58
58
|
```bash
|
|
59
59
|
# Perform a dry run
|
|
60
|
-
|
|
60
|
+
DRY_RUN=true mise run release
|
|
61
61
|
|
|
62
62
|
# Create a new release
|
|
63
|
-
|
|
63
|
+
mise run release
|
|
64
64
|
```
|
|
65
65
|
|
|
66
66
|
## Documentation
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: glean-indexing-sdk
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: SDK for building custom Glean indexing integrations
|
|
5
5
|
Project-URL: Source Code, https://github.com/glean-io/glean-indexing-sdk
|
|
6
6
|
Author-email: Steve Calvert <steve.calvert@glean.com>
|
|
@@ -435,6 +435,7 @@ class LargeKnowledgeBaseClient(StreamingConnectorDataClient[ArticleData]):
|
|
|
435
435
|
from typing import List, Sequence
|
|
436
436
|
|
|
437
437
|
from glean.api_client.models.userreferencedefinition import UserReferenceDefinition
|
|
438
|
+
|
|
438
439
|
from glean.indexing.connectors import BaseStreamingDatasourceConnector
|
|
439
440
|
from glean.indexing.models import ContentDefinition, CustomDatasourceConfig, DocumentDefinition
|
|
440
441
|
|
|
@@ -405,6 +405,7 @@ class LargeKnowledgeBaseClient(StreamingConnectorDataClient[ArticleData]):
|
|
|
405
405
|
from typing import List, Sequence
|
|
406
406
|
|
|
407
407
|
from glean.api_client.models.userreferencedefinition import UserReferenceDefinition
|
|
408
|
+
|
|
408
409
|
from glean.indexing.connectors import BaseStreamingDatasourceConnector
|
|
409
410
|
from glean.indexing.models import ContentDefinition, CustomDatasourceConfig, DocumentDefinition
|
|
410
411
|
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# Release Process
|
|
2
|
+
|
|
3
|
+
This document describes the release process for the Glean Connector SDK.
|
|
4
|
+
|
|
5
|
+
## Quick Start
|
|
6
|
+
|
|
7
|
+
Run the `/release` command in Claude Code for a guided release process.
|
|
8
|
+
|
|
9
|
+
## Dependencies
|
|
10
|
+
|
|
11
|
+
- [`mise`](https://mise.jdx.dev/) - Tool and task management
|
|
12
|
+
- [`commitizen`](https://github.com/commitizen-tools/commitizen) - Conventional commits and versioning
|
|
13
|
+
- [`uv`](https://github.com/astral-sh/uv) - Python package management
|
|
14
|
+
|
|
15
|
+
## Versioning
|
|
16
|
+
|
|
17
|
+
We follow [Semantic Versioning](https://semver.org/).
|
|
18
|
+
|
|
19
|
+
- **MAJOR** version for incompatible API changes
|
|
20
|
+
- **MINOR** version for new functionality in a backward compatible manner
|
|
21
|
+
- **PATCH** version for backward compatible bug fixes
|
|
22
|
+
|
|
23
|
+
Version bumps are determined automatically by commit message prefixes:
|
|
24
|
+
- `feat:` → MINOR bump
|
|
25
|
+
- `fix:` → PATCH bump
|
|
26
|
+
- `feat!:` or `BREAKING CHANGE:` → MAJOR bump
|
|
27
|
+
|
|
28
|
+
## Process
|
|
29
|
+
|
|
30
|
+
### 1. Ensure everything is ready for release
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
git checkout main
|
|
34
|
+
git pull origin main
|
|
35
|
+
mise run test
|
|
36
|
+
mise run lint
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### 2. Preview the release
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
DRY_RUN=true mise run release
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
This will show you:
|
|
46
|
+
- The version bump (e.g., 0.2.0 → 0.2.1)
|
|
47
|
+
- The changelog entries that will be generated
|
|
48
|
+
|
|
49
|
+
### 3. Run the release
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
mise run release
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
This will:
|
|
56
|
+
- Bump the version in `pyproject.toml`
|
|
57
|
+
- Update `CHANGELOG.md`
|
|
58
|
+
- Create a git commit
|
|
59
|
+
- Create a git tag (e.g., `v0.2.1`)
|
|
60
|
+
|
|
61
|
+
### 4. Push to trigger automated release
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
git push origin main --follow-tags
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
**That's it!** Pushing the tag triggers the GitHub Actions workflow which automatically:
|
|
68
|
+
- Builds the package
|
|
69
|
+
- Creates a GitHub Release with changelog
|
|
70
|
+
- Publishes to PyPI
|
|
71
|
+
|
|
72
|
+
### 5. Verify the release
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
# Watch the workflow
|
|
76
|
+
gh run watch
|
|
77
|
+
|
|
78
|
+
# Verify the release was created
|
|
79
|
+
gh release view v0.2.1
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Check:
|
|
83
|
+
- [GitHub Releases](https://github.com/gleanwork/glean-indexing-sdk/releases)
|
|
84
|
+
- [PyPI Package](https://pypi.org/project/glean-indexing-sdk/)
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
[tools]
|
|
2
|
+
python = "3.10"
|
|
3
|
+
uv = "latest"
|
|
4
|
+
node = "22"
|
|
5
|
+
|
|
6
|
+
[tasks]
|
|
7
|
+
|
|
8
|
+
[tasks.setup]
|
|
9
|
+
description = "Set up local environment (install dependencies, etc.)"
|
|
10
|
+
run = [
|
|
11
|
+
"uv venv .venv",
|
|
12
|
+
"uv pip install -e .[dev,test,lint,typing]",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
[tasks."test:all"]
|
|
16
|
+
description = "Run all tests and lint fixes"
|
|
17
|
+
run = [
|
|
18
|
+
"mise run test",
|
|
19
|
+
"mise run lint:fix",
|
|
20
|
+
"mise run lint",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[tasks.test]
|
|
24
|
+
description = "Run unit tests"
|
|
25
|
+
run = "uv run pytest -v --tb=auto -rA --durations=10 -p no:logging tests/"
|
|
26
|
+
|
|
27
|
+
[tasks."test:watch"]
|
|
28
|
+
description = "Run tests in watch mode"
|
|
29
|
+
run = "uv run ptw --now . -- -vv --tb=auto -rA --durations=10 -p no:logging tests/"
|
|
30
|
+
|
|
31
|
+
[tasks."test:cov"]
|
|
32
|
+
description = "Run tests with coverage"
|
|
33
|
+
run = "uv run pytest --cov=glean --cov-report=term --cov-report=html -v tests/"
|
|
34
|
+
|
|
35
|
+
[tasks.lint]
|
|
36
|
+
description = "Run all linters"
|
|
37
|
+
run = [
|
|
38
|
+
"mise run lint:ruff",
|
|
39
|
+
"mise run lint:format:check",
|
|
40
|
+
"mise run lint:pyright",
|
|
41
|
+
"mise run lint:readme",
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
[tasks."lint:diff"]
|
|
45
|
+
description = "Run linters on changed files"
|
|
46
|
+
run = [
|
|
47
|
+
"bash -lc \"PYTHON_FILES=$(git diff --name-only --diff-filter=d main | grep -E '.*\\.(py|ipynb)$' || true); [ -z \\\"$PYTHON_FILES\\\" ] || uv run ruff check $PYTHON_FILES\"",
|
|
48
|
+
"bash -lc \"PYTHON_FILES=$(git diff --name-only --diff-filter=d main | grep -E '.*\\.(py|ipynb)$' || true); [ -z \\\"$PYTHON_FILES\\\" ] || uv run ruff format $PYTHON_FILES --diff\"",
|
|
49
|
+
"bash -lc \"PYTHON_FILES=$(git diff --name-only --diff-filter=d main | grep -E '.*\\.(py|ipynb)$' || true); if [ -z \\\"$PYTHON_FILES\\\" ]; then uv run pyright; else uv run pyright $PYTHON_FILES; fi\"",
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
[tasks."lint:package"]
|
|
53
|
+
description = "Run linters on package files"
|
|
54
|
+
run = [
|
|
55
|
+
"uv run ruff check src/glean/indexing",
|
|
56
|
+
"uv run ruff format src/glean/indexing --diff",
|
|
57
|
+
"uv run pyright src/glean/indexing",
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
[tasks."lint:tests"]
|
|
61
|
+
description = "Run linters on test files"
|
|
62
|
+
run = [
|
|
63
|
+
"uv run ruff check tests",
|
|
64
|
+
"uv run ruff format tests --diff",
|
|
65
|
+
"uv run pyright tests",
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
[tasks."lint:fix"]
|
|
69
|
+
description = "Run the lint autofixers"
|
|
70
|
+
run = [
|
|
71
|
+
"mise run lint:fix:ruff",
|
|
72
|
+
"mise run format",
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
[tasks."lint:fix:diff"]
|
|
76
|
+
description = "Run lint autofixers on changed files"
|
|
77
|
+
run = [
|
|
78
|
+
"bash -lc \"PYTHON_FILES=$(git diff --name-only --diff-filter=d main | grep -E '.*\\.(py|ipynb)$' || true); [ -z \\\"$PYTHON_FILES\\\" ] || uv run ruff check $PYTHON_FILES --fix\"",
|
|
79
|
+
"mise run format:diff",
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
[tasks."lint:fix:package"]
|
|
83
|
+
description = "Run lint autofixers on package files"
|
|
84
|
+
run = [
|
|
85
|
+
"uv run ruff check src/glean/indexing --fix",
|
|
86
|
+
"mise run format",
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
[tasks."lint:fix:ruff"]
|
|
90
|
+
description = "Run Ruff autofixer"
|
|
91
|
+
run = "uv run ruff check . --fix --exclude .venv/ --exclude **/site-packages/"
|
|
92
|
+
|
|
93
|
+
[tasks."lint:ruff"]
|
|
94
|
+
description = "Run Ruff linter"
|
|
95
|
+
run = "uv run ruff check . --exclude .venv/ --exclude **/site-packages/"
|
|
96
|
+
|
|
97
|
+
[tasks."lint:format:check"]
|
|
98
|
+
description = "Check code formatting"
|
|
99
|
+
run = "uv run ruff format . --diff"
|
|
100
|
+
|
|
101
|
+
[tasks."lint:pyright"]
|
|
102
|
+
description = "Run Pyright type checker"
|
|
103
|
+
run = "uv run pyright"
|
|
104
|
+
|
|
105
|
+
[tasks."lint:readme"]
|
|
106
|
+
description = "Lint the README.md file"
|
|
107
|
+
run = "npx -y markdown-code check"
|
|
108
|
+
|
|
109
|
+
[tasks."lint:readme:fix"]
|
|
110
|
+
description = "Fix the README.md file"
|
|
111
|
+
run = "npx -y markdown-code sync"
|
|
112
|
+
|
|
113
|
+
[tasks.format]
|
|
114
|
+
description = "Run code formatters"
|
|
115
|
+
run = [
|
|
116
|
+
"mise run format:ruff",
|
|
117
|
+
"mise run format:imports",
|
|
118
|
+
]
|
|
119
|
+
|
|
120
|
+
[tasks."format:diff"]
|
|
121
|
+
description = "Run formatters on changed files"
|
|
122
|
+
run = [
|
|
123
|
+
"bash -lc \"PYTHON_FILES=$(git diff --name-only --diff-filter=d main | grep -E '.*\\.(py|ipynb)$' || true); [ -z \\\"$PYTHON_FILES\\\" ] || uv run ruff format $PYTHON_FILES\"",
|
|
124
|
+
"bash -lc \"PYTHON_FILES=$(git diff --name-only --diff-filter=d main | grep -E '.*\\.(py|ipynb)$' || true); [ -z \\\"$PYTHON_FILES\\\" ] || uv run ruff check --select I --fix $PYTHON_FILES\"",
|
|
125
|
+
]
|
|
126
|
+
|
|
127
|
+
[tasks."format:ruff"]
|
|
128
|
+
description = "Run Ruff formatter"
|
|
129
|
+
run = "uv run ruff format ."
|
|
130
|
+
|
|
131
|
+
[tasks."format:imports"]
|
|
132
|
+
description = "Fix imports"
|
|
133
|
+
run = "uv run ruff check --select I --fix ."
|
|
134
|
+
|
|
135
|
+
[tasks."spell:check"]
|
|
136
|
+
description = "Check spelling"
|
|
137
|
+
run = "uv run codespell --toml pyproject.toml --skip=docs/"
|
|
138
|
+
|
|
139
|
+
[tasks."spell:fix"]
|
|
140
|
+
description = "Fix spelling"
|
|
141
|
+
run = "uv run codespell --toml pyproject.toml -w --skip=docs/"
|
|
142
|
+
|
|
143
|
+
[tasks.clean]
|
|
144
|
+
description = "Clean build artifacts"
|
|
145
|
+
run = [
|
|
146
|
+
"rm -rf dist .venv build **/*.egg-info .pytest_cache .coverage htmlcov .ruff_cache",
|
|
147
|
+
"find . -name \"*.pyc\" -delete",
|
|
148
|
+
"find . -name \"__pycache__\" -delete",
|
|
149
|
+
]
|
|
150
|
+
|
|
151
|
+
[tasks.build]
|
|
152
|
+
description = "Build the package"
|
|
153
|
+
run = "uv run python -m build"
|
|
154
|
+
|
|
155
|
+
[tasks.release]
|
|
156
|
+
description = "Bump version and create a new tag (use DRY_RUN=true for preview)"
|
|
157
|
+
run = [
|
|
158
|
+
"bash -lc 'if [ \"$DRY_RUN\" = \"true\" ]; then uv run python -m commitizen bump --dry-run && uv run python -m commitizen changelog --dry-run; else uv run python -m commitizen bump --yes && uv run python -m commitizen changelog; fi'",
|
|
159
|
+
]
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "glean-indexing-sdk"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.0"
|
|
8
8
|
description = "SDK for building custom Glean indexing integrations"
|
|
9
9
|
authors = [{ name = "Steve Calvert", email = "steve.calvert@glean.com" }]
|
|
10
10
|
readme = "README.md"
|
|
@@ -129,3 +129,9 @@ filterwarnings = [
|
|
|
129
129
|
|
|
130
130
|
[tool.hatch.build.targets.wheel]
|
|
131
131
|
packages = ["src/glean"]
|
|
132
|
+
|
|
133
|
+
[dependency-groups]
|
|
134
|
+
dev = [
|
|
135
|
+
"pytest>=8.3.5",
|
|
136
|
+
"pytest-asyncio>=0.26.0",
|
|
137
|
+
]
|
{glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/snippets/streaming/article_connector.py
RENAMED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from typing import List, Sequence
|
|
2
2
|
|
|
3
3
|
from glean.api_client.models.userreferencedefinition import UserReferenceDefinition
|
|
4
|
+
|
|
4
5
|
from glean.indexing.connectors import BaseStreamingDatasourceConnector
|
|
5
6
|
from glean.indexing.models import ContentDefinition, CustomDatasourceConfig, DocumentDefinition
|
|
6
7
|
|
|
@@ -1,56 +1,56 @@
|
|
|
1
1
|
"""Glean Indexing SDK.
|
|
2
2
|
|
|
3
|
-
A Python SDK for building custom Glean indexing solutions. This package provides
|
|
3
|
+
A Python SDK for building custom Glean indexing solutions. This package provides
|
|
4
4
|
the base classes and utilities to create custom connectors for Glean's indexing APIs.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
from importlib.metadata import
|
|
7
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
8
|
+
|
|
9
|
+
from glean.indexing import models
|
|
10
|
+
from glean.indexing.common import BatchProcessor, ConnectorMetrics, ContentFormatter, MockGleanClient, api_client
|
|
8
11
|
from glean.indexing.connectors import (
|
|
12
|
+
BaseAsyncStreamingDataClient,
|
|
13
|
+
BaseAsyncStreamingDatasourceConnector,
|
|
9
14
|
BaseConnector,
|
|
15
|
+
BaseDataClient,
|
|
10
16
|
BaseDatasourceConnector,
|
|
11
|
-
BaseStreamingDatasourceConnector,
|
|
12
17
|
BasePeopleConnector,
|
|
13
|
-
|
|
14
|
-
|
|
18
|
+
BaseStreamingDataClient,
|
|
19
|
+
BaseStreamingDatasourceConnector,
|
|
15
20
|
)
|
|
16
|
-
from glean.indexing.common import BatchProcessor, ContentFormatter, ConnectorMetrics, api_client, MockGleanClient
|
|
17
|
-
from glean.indexing.observability.observability import ConnectorObservability
|
|
18
|
-
from glean.indexing.testing import ConnectorTestHarness
|
|
19
21
|
from glean.indexing.models import (
|
|
20
22
|
DatasourceIdentityDefinitions,
|
|
21
23
|
IndexingMode,
|
|
22
|
-
TSourceData,
|
|
23
24
|
TIndexableEntityDefinition,
|
|
25
|
+
TSourceData,
|
|
24
26
|
)
|
|
25
|
-
from glean.indexing import
|
|
27
|
+
from glean.indexing.observability.observability import ConnectorObservability
|
|
28
|
+
from glean.indexing.testing import ConnectorTestHarness
|
|
26
29
|
|
|
27
30
|
__all__ = [
|
|
28
31
|
"BaseConnector",
|
|
32
|
+
"BaseDataClient",
|
|
29
33
|
"BaseDatasourceConnector",
|
|
30
34
|
"BasePeopleConnector",
|
|
35
|
+
"BaseStreamingDataClient",
|
|
31
36
|
"BaseStreamingDatasourceConnector",
|
|
32
|
-
|
|
33
|
-
"
|
|
34
|
-
"StreamingConnectorDataClient",
|
|
35
|
-
|
|
37
|
+
"BaseAsyncStreamingDataClient",
|
|
38
|
+
"BaseAsyncStreamingDatasourceConnector",
|
|
36
39
|
"BatchProcessor",
|
|
37
40
|
"ContentFormatter",
|
|
38
41
|
"ConnectorMetrics",
|
|
39
42
|
"ConnectorObservability",
|
|
40
43
|
"ConnectorTestHarness",
|
|
41
|
-
|
|
42
44
|
"DatasourceIdentityDefinitions",
|
|
43
45
|
"IndexingMode",
|
|
44
46
|
"TSourceData",
|
|
45
47
|
"TIndexableEntityDefinition",
|
|
46
|
-
|
|
47
48
|
"MockGleanClient",
|
|
48
49
|
"api_client",
|
|
49
|
-
|
|
50
50
|
"models",
|
|
51
51
|
]
|
|
52
52
|
|
|
53
53
|
try:
|
|
54
54
|
__version__ = version("glean-indexing-sdk")
|
|
55
55
|
except PackageNotFoundError:
|
|
56
|
-
__version__ = "0.
|
|
56
|
+
__version__ = "0.3.0"
|
{glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.0}/src/glean/indexing/connectors/__init__.py
RENAMED
|
@@ -1,21 +1,23 @@
|
|
|
1
1
|
"""Connector implementations for Glean indexing."""
|
|
2
2
|
|
|
3
3
|
from glean.indexing.connectors.base_connector import BaseConnector
|
|
4
|
-
from glean.indexing.connectors.base_data_client import BaseDataClient
|
|
4
|
+
from glean.indexing.connectors.base_data_client import BaseDataClient
|
|
5
5
|
from glean.indexing.connectors.base_datasource_connector import BaseDatasourceConnector
|
|
6
6
|
from glean.indexing.connectors.base_people_connector import BasePeopleConnector
|
|
7
|
-
from glean.indexing.connectors.base_streaming_data_client import BaseStreamingDataClient
|
|
7
|
+
from glean.indexing.connectors.base_streaming_data_client import BaseStreamingDataClient
|
|
8
8
|
from glean.indexing.connectors.base_streaming_datasource_connector import BaseStreamingDatasourceConnector
|
|
9
|
+
from glean.indexing.connectors.base_async_streaming_data_client import BaseAsyncStreamingDataClient
|
|
10
|
+
from glean.indexing.connectors.base_async_streaming_datasource_connector import BaseAsyncStreamingDatasourceConnector
|
|
9
11
|
from glean.indexing.testing.connector_test_harness import ConnectorTestHarness
|
|
10
12
|
|
|
11
13
|
__all__ = [
|
|
12
14
|
"BaseConnector",
|
|
13
15
|
"BaseDataClient",
|
|
14
|
-
"BaseConnectorDataClient", # Backward compatibility alias
|
|
15
16
|
"BaseDatasourceConnector",
|
|
16
17
|
"BasePeopleConnector",
|
|
17
|
-
"BaseStreamingDataClient",
|
|
18
|
-
"StreamingConnectorDataClient", # Backward compatibility alias
|
|
18
|
+
"BaseStreamingDataClient",
|
|
19
19
|
"BaseStreamingDatasourceConnector",
|
|
20
|
+
"BaseAsyncStreamingDataClient",
|
|
21
|
+
"BaseAsyncStreamingDatasourceConnector",
|
|
20
22
|
"ConnectorTestHarness",
|
|
21
23
|
]
|