glean-indexing-sdk 0.2.0__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. glean_indexing_sdk-0.3.1/.claude/commands/release.md +77 -0
  2. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/.cz.toml +3 -2
  3. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/.github/workflows/ci.yml +9 -12
  4. glean_indexing_sdk-0.3.1/CHANGELOG.md +38 -0
  5. glean_indexing_sdk-0.3.1/CLAUDE.md +73 -0
  6. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/CONTRIBUTING.md +8 -8
  7. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/PKG-INFO +2 -1
  8. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/README.md +1 -0
  9. glean_indexing_sdk-0.3.1/RELEASE.md +84 -0
  10. glean_indexing_sdk-0.3.1/mise.toml +181 -0
  11. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/pyproject.toml +7 -1
  12. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/snippets/streaming/article_connector.py +1 -0
  13. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/__init__.py +18 -18
  14. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/connectors/__init__.py +7 -5
  15. glean_indexing_sdk-0.3.1/src/glean/indexing/connectors/base_async_streaming_data_client.py +42 -0
  16. glean_indexing_sdk-0.3.1/src/glean/indexing/connectors/base_async_streaming_datasource_connector.py +233 -0
  17. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/connectors/base_data_client.py +0 -4
  18. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/connectors/base_datasource_connector.py +4 -3
  19. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/connectors/base_people_connector.py +4 -3
  20. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/connectors/base_streaming_data_client.py +0 -4
  21. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/connectors/base_streaming_datasource_connector.py +6 -4
  22. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/testing/mock_glean_client.py +1 -0
  23. glean_indexing_sdk-0.3.1/tests/unit_tests/test_async_streaming.py +236 -0
  24. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/tests/unit_tests/test_base_datasource_connector.py +3 -2
  25. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/uv.lock +26 -14
  26. glean_indexing_sdk-0.2.0/CHANGELOG.md +0 -10
  27. glean_indexing_sdk-0.2.0/RELEASE.md +0 -87
  28. glean_indexing_sdk-0.2.0/mise.toml +0 -4
  29. glean_indexing_sdk-0.2.0/taskfile.yml +0 -262
  30. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/.env.template +0 -0
  31. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/.github/CODEOWNERS +0 -0
  32. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/.github/workflows/publish.yml +0 -0
  33. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/.gitignore +0 -0
  34. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/.markdown-coderc.json +0 -0
  35. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/.python-version +0 -0
  36. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/.ruff.toml +0 -0
  37. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/.vscode/settings.json +0 -0
  38. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/LICENSE +0 -0
  39. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/env.template +0 -0
  40. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/snippets/non_streaming/complete.py +0 -0
  41. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/snippets/non_streaming/run_connector.py +0 -0
  42. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/snippets/non_streaming/wiki_connector.py +0 -0
  43. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/snippets/non_streaming/wiki_data_client.py +0 -0
  44. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/snippets/non_streaming/wiki_page_data.py +0 -0
  45. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/snippets/streaming/article_data.py +0 -0
  46. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/snippets/streaming/article_data_client.py +0 -0
  47. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/snippets/streaming/run_connector.py +0 -0
  48. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/common/__init__.py +0 -0
  49. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/common/batch_processor.py +0 -0
  50. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/common/content_formatter.py +0 -0
  51. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/common/glean_client.py +0 -0
  52. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/common/metrics.py +0 -0
  53. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/common/mocks.py +0 -0
  54. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/common/property_definition_builder.py +0 -0
  55. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/connectors/base_connector.py +0 -0
  56. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/models.py +0 -0
  57. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/observability/__init__.py +0 -0
  58. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/observability/observability.py +0 -0
  59. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/py.typed +0 -0
  60. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/testing/__init__.py +0 -0
  61. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/testing/connector_test_harness.py +0 -0
  62. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/testing/mock_data_source.py +0 -0
  63. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/src/glean/indexing/testing/response_validator.py +0 -0
  64. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/tests/__init__.py +0 -0
  65. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/tests/integration_tests/__init__.py +0 -0
  66. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/tests/unit_tests/__init__.py +0 -0
  67. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/tests/unit_tests/common/__init__.py +0 -0
  68. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/tests/unit_tests/common/mock_clients.py +0 -0
  69. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/tests/unit_tests/common/test_batch_processor.py +0 -0
  70. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/tests/unit_tests/common/test_content_formatter.py +0 -0
  71. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/tests/unit_tests/common/test_metrics.py +0 -0
  72. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/tests/unit_tests/common/test_property_definition_builder.py +1 -1
  73. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/tests/unit_tests/test_base_connector.py +0 -0
  74. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/tests/unit_tests/test_base_data_client.py +0 -0
  75. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/tests/unit_tests/test_base_people_connector.py +1 -1
  76. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/tests/unit_tests/test_base_streaming_datasource_connector.py +1 -1
  77. {glean_indexing_sdk-0.2.0 → glean_indexing_sdk-0.3.1}/tests/unit_tests/test_custom_connector_integration.py +1 -1
@@ -0,0 +1,77 @@
1
+ # Release Process
2
+
3
+ Guide the user through releasing a new version of the Glean Indexing SDK.
4
+
5
+ ## Instructions
6
+
7
+ Follow these steps to release a new version:
8
+
9
+ ### Step 1: Pre-flight Checks
10
+
11
+ Run the following checks to ensure the codebase is ready for release:
12
+
13
+ 1. **Check git status**: Ensure working directory is clean and on `main` branch
14
+ 2. **Pull latest**: `git pull origin main`
15
+ 3. **Run tests**: `mise run test`
16
+ 4. **Run linting**: `mise run lint`
17
+
18
+ If any checks fail, stop and fix the issues before proceeding.
19
+
20
+ ### Step 2: Preview the Release
21
+
22
+ Run a dry-run to see what version bump will occur and what the changelog will look like:
23
+
24
+ ```bash
25
+ DRY_RUN=true mise run release
26
+ ```
27
+
28
+ Show the user:
29
+ - The current version (from `pyproject.toml`)
30
+ - The new version that will be created
31
+ - The changelog entries that will be added
32
+
33
+ Ask the user to confirm they want to proceed with the release.
34
+
35
+ ### Step 3: Create the Release
36
+
37
+ If the user confirms, run the actual release:
38
+
39
+ ```bash
40
+ mise run release
41
+ ```
42
+
43
+ This will:
44
+ - Bump the version in `pyproject.toml`
45
+ - Update `CHANGELOG.md`
46
+ - Create a git commit with the version bump
47
+ - Create a git tag (e.g., `v0.2.1`)
48
+
49
+ ### Step 4: Push to Remote
50
+
51
+ Push the commit and tag to the remote:
52
+
53
+ ```bash
54
+ git push origin main --follow-tags
55
+ ```
56
+
57
+ **Important:** Pushing the tag triggers the GitHub Actions workflow (`.github/workflows/publish.yml`) which automatically:
58
+ - Builds the package
59
+ - Creates a GitHub Release with changelog
60
+ - Publishes to PyPI
61
+
62
+ ### Step 5: Verify the Release
63
+
64
+ After pushing, guide the user to verify:
65
+
66
+ 1. Check the GitHub Actions workflow is running: `gh run list --limit 3`
67
+ 2. Watch the workflow: `gh run watch`
68
+ 3. Once complete, verify:
69
+ - GitHub Release exists: `gh release view vX.Y.Z`
70
+ - PyPI package is published: https://pypi.org/project/glean-indexing-sdk/
71
+
72
+ ## Summary
73
+
74
+ At the end, provide a summary:
75
+ - New version number
76
+ - Link to the GitHub release: https://github.com/gleanwork/glean-indexing-sdk/releases/tag/vX.Y.Z
77
+ - Link to PyPI: https://pypi.org/project/glean-indexing-sdk/X.Y.Z/
@@ -1,5 +1,6 @@
1
1
  [tool.commitizen]
2
2
  name = "cz_conventional_commits"
3
- version = "0.2.0"
3
+ version = "0.3.1"
4
4
  tag_format = "v$version"
5
- version_files = ["pyproject.toml:version", "src/glean/indexing/__init__.py:__version__"]
5
+ version_files = ["pyproject.toml:version", "src/glean/indexing/__init__.py:__version__"]
6
+ update_changelog_on_bump = true
@@ -22,13 +22,13 @@ jobs:
22
22
  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
23
23
 
24
24
  - name: Setup development environment
25
- run: task setup
25
+ run: mise run setup
26
26
 
27
27
  - name: Run linters
28
- run: task lint
28
+ run: mise run lint
29
29
 
30
30
  - name: Build package
31
- run: task build
31
+ run: mise run build
32
32
 
33
33
  test:
34
34
  name: Test Python ${{ matrix.python-version }}
@@ -44,23 +44,20 @@ jobs:
44
44
  uses: jdx/mise-action@v2
45
45
  with:
46
46
  cache: true
47
- mise_toml: |
48
- [tools]
49
- python = "${{ matrix.python-version }}"
50
- task = "latest"
51
- uv = "latest"
52
-
53
47
  env:
54
48
  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
55
49
 
50
+ - name: Set Python version for matrix
51
+ run: mise use python@${{ matrix.python-version }}
52
+
56
53
  - name: Setup development environment
57
- run: task setup
54
+ run: mise run setup
58
55
 
59
56
  - name: Run tests
60
- run: task test
57
+ run: mise run test
61
58
 
62
59
  - name: Test package installation
63
60
  run: |
64
- task build
61
+ mise run build
65
62
  uv run pip install dist/*.whl
66
63
  uv run python -c "import glean.indexing; print('Package installed successfully')"
@@ -0,0 +1,38 @@
1
+ ## v0.3.1 (2026-02-04)
2
+
3
+ ### Fix
4
+
5
+ - **release**: include CHANGELOG.md and uv.lock in release commit
6
+
7
+ ## v0.3.0 (2026-02-04)
8
+
9
+ ### Feat
10
+
11
+ - add /release command for guided release process
12
+
13
+ ### Fix
14
+
15
+ - use mise use to set Python version instead of overriding mise_toml
16
+
17
+ ### Refactor
18
+
19
+ - remove legacy class aliases and standardize naming
20
+ - rename async classes to use Base prefix consistently
21
+ - rename async streaming files to follow base_* naming convention
22
+
23
+ ## v0.2.0 (2025-07-24)
24
+
25
+ ### Feat
26
+
27
+ - Adds support for forced restarts of indexing uploads
28
+
29
+ ## v0.1.0 (2025-07-23)
30
+
31
+ ### Feat
32
+
33
+ - Adds property definition builder
34
+
35
+ ### Fix
36
+
37
+ - Fixing format of tags for release
38
+ - Adds addition model for re-export
@@ -0,0 +1,73 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Project Overview
6
+
7
+ Python SDK for building custom Glean indexing connectors. Provides base classes and utilities to create connectors that fetch data from external systems and upload to Glean's indexing APIs.
8
+
9
+ ## Commands
10
+
11
+ Uses [mise](https://mise.jdx.dev/) (`brew install mise`) for toolchain and task management with `uv` for Python dependencies.
12
+
13
+ ```bash
14
+ # Setup
15
+ mise run setup # Create venv and install all dependencies
16
+
17
+ # Testing
18
+ mise run test # Run all tests
19
+ mise run test:watch # Run tests in watch mode
20
+ mise run test:cov # Run tests with coverage
21
+
22
+ # Linting
23
+ mise run lint # Run all linters (ruff, pyright, readme)
24
+ mise run lint:fix # Auto-fix lint issues and format code
25
+
26
+ # Building
27
+ mise run build # Build the package
28
+ ```
29
+
30
+ Run a single test:
31
+ ```bash
32
+ uv run pytest tests/unit_tests/test_base_connector.py -v
33
+ uv run pytest tests/unit_tests/test_base_connector.py::TestClassName::test_method -v
34
+ ```
35
+
36
+ ## Architecture
37
+
38
+ ### Core Abstractions
39
+
40
+ **Connector hierarchy** (`src/glean/indexing/connectors/`):
41
+ - `BaseConnector` - Abstract base defining `get_data()`, `transform()`, `index_data()`
42
+ - `BaseDatasourceConnector` - For document/content indexing (fits in memory)
43
+ - `BaseStreamingDatasourceConnector` - For large/paginated datasets (yields data via sync generator)
44
+ - `BaseAsyncStreamingDatasourceConnector` - For large datasets with async APIs (yields data via async generator)
45
+ - `BasePeopleConnector` - For employee/identity indexing
46
+
47
+ **Data clients** (`src/glean/indexing/connectors/`):
48
+ - `BaseDataClient[T]` - Fetches all data at once, returns `Sequence[T]`
49
+ - `BaseStreamingDataClient[T]` - Yields data incrementally via `Generator[T]`
50
+ - `BaseAsyncStreamingDataClient[T]` - Yields data incrementally via `AsyncGenerator[T]`
51
+
52
+ ### Pattern: Implementing a Connector
53
+
54
+ 1. Define data type as `TypedDict`
55
+ 2. Create data client extending `BaseDataClient[YourType]`
56
+ 3. Create connector extending `BaseDatasourceConnector[YourType]`
57
+ 4. Set `configuration: CustomDatasourceConfig` class attribute
58
+ 5. Implement `transform()` to convert source data to `DocumentDefinition`
59
+
60
+ ### Key Modules
61
+
62
+ - `models.py` - Type definitions, `IndexingMode`, `DocumentDefinition`, etc.
63
+ - `common/glean_client.py` - API client wrapper (uses env vars `GLEAN_INSTANCE`, `GLEAN_INDEXING_API_TOKEN`)
64
+ - `common/batch_processor.py` - Batches data for upload
65
+ - `observability/` - Logging decorators and metrics tracking
66
+ - `testing/` - `ConnectorTestHarness`, `MockGleanClient` for testing without API calls
67
+
68
+ ## Code Style
69
+
70
+ - Line length: 160 characters
71
+ - Docstrings: Google style
72
+ - Type hints required (pyright basic mode)
73
+ - Ruff for linting and formatting
@@ -7,11 +7,11 @@ Thank you for your interest in contributing to the Glean Connector SDK! This doc
7
7
  1. Clone the repository
8
8
  2. Set up your environment:
9
9
  ```bash
10
- # Install go-task if not already installed
11
- brew install go-task
10
+ # Install mise if not already installed
11
+ brew install mise
12
12
 
13
13
  # Set up development environment
14
- task setup
14
+ mise run setup
15
15
  ```
16
16
 
17
17
  ## Development Workflow
@@ -27,8 +27,8 @@ We use the following workflow for development:
27
27
 
28
28
  3. Run linting and tests:
29
29
  ```bash
30
- task lint:fix
31
- task test:all
30
+ mise run lint:fix
31
+ mise run test:all
32
32
  ```
33
33
 
34
34
  4. Commit your changes using commitizen:
@@ -43,7 +43,7 @@ We use the following workflow for development:
43
43
  We follow standard Python code styles:
44
44
 
45
45
  - Use [Ruff](https://github.com/astral-sh/ruff) for linting and formatting
46
- - Use [MyPy](https://mypy.readthedocs.io) for type checking
46
+ - Use [Pyright](https://github.com/microsoft/pyright) for type checking
47
47
  - Follow [type hints](https://docs.python.org/3/library/typing.html) in all code
48
48
 
49
49
  ## Testing
@@ -57,10 +57,10 @@ We use [commitizen](https://commitizen-tools.github.io/commitizen/) for versioni
57
57
 
58
58
  ```bash
59
59
  # Perform a dry run
60
- task release DRY_RUN=true
60
+ DRY_RUN=true mise run release
61
61
 
62
62
  # Create a new release
63
- task release
63
+ mise run release
64
64
  ```
65
65
 
66
66
  ## Documentation
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: glean-indexing-sdk
3
- Version: 0.2.0
3
+ Version: 0.3.1
4
4
  Summary: SDK for building custom Glean indexing integrations
5
5
  Project-URL: Source Code, https://github.com/glean-io/glean-indexing-sdk
6
6
  Author-email: Steve Calvert <steve.calvert@glean.com>
@@ -435,6 +435,7 @@ class LargeKnowledgeBaseClient(StreamingConnectorDataClient[ArticleData]):
435
435
  from typing import List, Sequence
436
436
 
437
437
  from glean.api_client.models.userreferencedefinition import UserReferenceDefinition
438
+
438
439
  from glean.indexing.connectors import BaseStreamingDatasourceConnector
439
440
  from glean.indexing.models import ContentDefinition, CustomDatasourceConfig, DocumentDefinition
440
441
 
@@ -405,6 +405,7 @@ class LargeKnowledgeBaseClient(StreamingConnectorDataClient[ArticleData]):
405
405
  from typing import List, Sequence
406
406
 
407
407
  from glean.api_client.models.userreferencedefinition import UserReferenceDefinition
408
+
408
409
  from glean.indexing.connectors import BaseStreamingDatasourceConnector
409
410
  from glean.indexing.models import ContentDefinition, CustomDatasourceConfig, DocumentDefinition
410
411
 
@@ -0,0 +1,84 @@
1
+ # Release Process
2
+
3
+ This document describes the release process for the Glean Connector SDK.
4
+
5
+ ## Quick Start
6
+
7
+ Run the `/release` command in Claude Code for a guided release process.
8
+
9
+ ## Dependencies
10
+
11
+ - [`mise`](https://mise.jdx.dev/) - Tool and task management
12
+ - [`commitizen`](https://github.com/commitizen-tools/commitizen) - Conventional commits and versioning
13
+ - [`uv`](https://github.com/astral-sh/uv) - Python package management
14
+
15
+ ## Versioning
16
+
17
+ We follow [Semantic Versioning](https://semver.org/).
18
+
19
+ - **MAJOR** version for incompatible API changes
20
+ - **MINOR** version for new functionality in a backward compatible manner
21
+ - **PATCH** version for backward compatible bug fixes
22
+
23
+ Version bumps are determined automatically by commit message prefixes:
24
+ - `feat:` → MINOR bump
25
+ - `fix:` → PATCH bump
26
+ - `feat!:` or `BREAKING CHANGE:` → MAJOR bump
27
+
28
+ ## Process
29
+
30
+ ### 1. Ensure everything is ready for release
31
+
32
+ ```bash
33
+ git checkout main
34
+ git pull origin main
35
+ mise run test
36
+ mise run lint
37
+ ```
38
+
39
+ ### 2. Preview the release
40
+
41
+ ```bash
42
+ DRY_RUN=true mise run release
43
+ ```
44
+
45
+ This will show you:
46
+ - The version bump (e.g., 0.2.0 → 0.2.1)
47
+ - The changelog entries that will be generated
48
+
49
+ ### 3. Run the release
50
+
51
+ ```bash
52
+ mise run release
53
+ ```
54
+
55
+ This will:
56
+ - Bump the version in `pyproject.toml`
57
+ - Update `CHANGELOG.md`
58
+ - Create a git commit
59
+ - Create a git tag (e.g., `v0.2.1`)
60
+
61
+ ### 4. Push to trigger automated release
62
+
63
+ ```bash
64
+ git push origin main --follow-tags
65
+ ```
66
+
67
+ **That's it!** Pushing the tag triggers the GitHub Actions workflow which automatically:
68
+ - Builds the package
69
+ - Creates a GitHub Release with changelog
70
+ - Publishes to PyPI
71
+
72
+ ### 5. Verify the release
73
+
74
+ ```bash
75
+ # Watch the workflow
76
+ gh run watch
77
+
78
+ # Verify the release was created
79
+ gh release view v0.2.1
80
+ ```
81
+
82
+ Check:
83
+ - [GitHub Releases](https://github.com/gleanwork/glean-indexing-sdk/releases)
84
+ - [PyPI Package](https://pypi.org/project/glean-indexing-sdk/)
@@ -0,0 +1,181 @@
1
+ [tools]
2
+ python = "3.10"
3
+ uv = "latest"
4
+ node = "22"
5
+
6
+ [tasks]
7
+
8
+ [tasks.setup]
9
+ description = "Set up local environment (install dependencies, etc.)"
10
+ run = [
11
+ "uv venv .venv",
12
+ "uv pip install -e .[dev,test,lint,typing]",
13
+ ]
14
+
15
+ [tasks."test:all"]
16
+ description = "Run all tests and lint fixes"
17
+ run = [
18
+ "mise run test",
19
+ "mise run lint:fix",
20
+ "mise run lint",
21
+ ]
22
+
23
+ [tasks.test]
24
+ description = "Run unit tests"
25
+ run = "uv run pytest -v --tb=auto -rA --durations=10 -p no:logging tests/"
26
+
27
+ [tasks."test:watch"]
28
+ description = "Run tests in watch mode"
29
+ run = "uv run ptw --now . -- -vv --tb=auto -rA --durations=10 -p no:logging tests/"
30
+
31
+ [tasks."test:cov"]
32
+ description = "Run tests with coverage"
33
+ run = "uv run pytest --cov=glean --cov-report=term --cov-report=html -v tests/"
34
+
35
+ [tasks.lint]
36
+ description = "Run all linters"
37
+ run = [
38
+ "mise run lint:ruff",
39
+ "mise run lint:format:check",
40
+ "mise run lint:pyright",
41
+ "mise run lint:readme",
42
+ ]
43
+
44
+ [tasks."lint:diff"]
45
+ description = "Run linters on changed files"
46
+ run = [
47
+ "bash -lc \"PYTHON_FILES=$(git diff --name-only --diff-filter=d main | grep -E '.*\\.(py|ipynb)$' || true); [ -z \\\"$PYTHON_FILES\\\" ] || uv run ruff check $PYTHON_FILES\"",
48
+ "bash -lc \"PYTHON_FILES=$(git diff --name-only --diff-filter=d main | grep -E '.*\\.(py|ipynb)$' || true); [ -z \\\"$PYTHON_FILES\\\" ] || uv run ruff format $PYTHON_FILES --diff\"",
49
+ "bash -lc \"PYTHON_FILES=$(git diff --name-only --diff-filter=d main | grep -E '.*\\.(py|ipynb)$' || true); if [ -z \\\"$PYTHON_FILES\\\" ]; then uv run pyright; else uv run pyright $PYTHON_FILES; fi\"",
50
+ ]
51
+
52
+ [tasks."lint:package"]
53
+ description = "Run linters on package files"
54
+ run = [
55
+ "uv run ruff check src/glean/indexing",
56
+ "uv run ruff format src/glean/indexing --diff",
57
+ "uv run pyright src/glean/indexing",
58
+ ]
59
+
60
+ [tasks."lint:tests"]
61
+ description = "Run linters on test files"
62
+ run = [
63
+ "uv run ruff check tests",
64
+ "uv run ruff format tests --diff",
65
+ "uv run pyright tests",
66
+ ]
67
+
68
+ [tasks."lint:fix"]
69
+ description = "Run the lint autofixers"
70
+ run = [
71
+ "mise run lint:fix:ruff",
72
+ "mise run format",
73
+ ]
74
+
75
+ [tasks."lint:fix:diff"]
76
+ description = "Run lint autofixers on changed files"
77
+ run = [
78
+ "bash -lc \"PYTHON_FILES=$(git diff --name-only --diff-filter=d main | grep -E '.*\\.(py|ipynb)$' || true); [ -z \\\"$PYTHON_FILES\\\" ] || uv run ruff check $PYTHON_FILES --fix\"",
79
+ "mise run format:diff",
80
+ ]
81
+
82
+ [tasks."lint:fix:package"]
83
+ description = "Run lint autofixers on package files"
84
+ run = [
85
+ "uv run ruff check src/glean/indexing --fix",
86
+ "mise run format",
87
+ ]
88
+
89
+ [tasks."lint:fix:ruff"]
90
+ description = "Run Ruff autofixer"
91
+ run = "uv run ruff check . --fix --exclude .venv/ --exclude **/site-packages/"
92
+
93
+ [tasks."lint:ruff"]
94
+ description = "Run Ruff linter"
95
+ run = "uv run ruff check . --exclude .venv/ --exclude **/site-packages/"
96
+
97
+ [tasks."lint:format:check"]
98
+ description = "Check code formatting"
99
+ run = "uv run ruff format . --diff"
100
+
101
+ [tasks."lint:pyright"]
102
+ description = "Run Pyright type checker"
103
+ run = "uv run pyright"
104
+
105
+ [tasks."lint:readme"]
106
+ description = "Lint the README.md file"
107
+ run = "npx -y markdown-code check"
108
+
109
+ [tasks."lint:readme:fix"]
110
+ description = "Fix the README.md file"
111
+ run = "npx -y markdown-code sync"
112
+
113
+ [tasks.format]
114
+ description = "Run code formatters"
115
+ run = [
116
+ "mise run format:ruff",
117
+ "mise run format:imports",
118
+ ]
119
+
120
+ [tasks."format:diff"]
121
+ description = "Run formatters on changed files"
122
+ run = [
123
+ "bash -lc \"PYTHON_FILES=$(git diff --name-only --diff-filter=d main | grep -E '.*\\.(py|ipynb)$' || true); [ -z \\\"$PYTHON_FILES\\\" ] || uv run ruff format $PYTHON_FILES\"",
124
+ "bash -lc \"PYTHON_FILES=$(git diff --name-only --diff-filter=d main | grep -E '.*\\.(py|ipynb)$' || true); [ -z \\\"$PYTHON_FILES\\\" ] || uv run ruff check --select I --fix $PYTHON_FILES\"",
125
+ ]
126
+
127
+ [tasks."format:ruff"]
128
+ description = "Run Ruff formatter"
129
+ run = "uv run ruff format ."
130
+
131
+ [tasks."format:imports"]
132
+ description = "Fix imports"
133
+ run = "uv run ruff check --select I --fix ."
134
+
135
+ [tasks."spell:check"]
136
+ description = "Check spelling"
137
+ run = "uv run codespell --toml pyproject.toml --skip=docs/"
138
+
139
+ [tasks."spell:fix"]
140
+ description = "Fix spelling"
141
+ run = "uv run codespell --toml pyproject.toml -w --skip=docs/"
142
+
143
+ [tasks.clean]
144
+ description = "Clean build artifacts"
145
+ run = [
146
+ "rm -rf dist .venv build **/*.egg-info .pytest_cache .coverage htmlcov .ruff_cache",
147
+ "find . -name \"*.pyc\" -delete",
148
+ "find . -name \"__pycache__\" -delete",
149
+ ]
150
+
151
+ [tasks.build]
152
+ description = "Build the package"
153
+ run = "uv run python -m build"
154
+
155
+ [tasks.release]
156
+ description = "Bump version and create a new tag (use DRY_RUN=true for preview)"
157
+ run = [
158
+ """bash -lc '
159
+ if [ "$DRY_RUN" = "true" ]; then
160
+ uv run python -m commitizen bump --dry-run
161
+ uv run python -m commitizen changelog --dry-run
162
+ else
163
+ # Bump version (includes changelog update via update_changelog_on_bump)
164
+ uv run python -m commitizen bump --yes
165
+
166
+ # Get the tag that was just created
167
+ TAG=$(git describe --tags --abbrev=0)
168
+
169
+ # Regenerate uv.lock with new version
170
+ uv lock
171
+
172
+ # Amend the commit to include uv.lock
173
+ git add uv.lock
174
+ git commit --amend --no-edit
175
+
176
+ # Update the tag to point to the amended commit
177
+ git tag -d "$TAG"
178
+ git tag "$TAG"
179
+ fi
180
+ '""",
181
+ ]
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "glean-indexing-sdk"
7
- version = "0.2.0"
7
+ version = "0.3.1"
8
8
  description = "SDK for building custom Glean indexing integrations"
9
9
  authors = [{ name = "Steve Calvert", email = "steve.calvert@glean.com" }]
10
10
  readme = "README.md"
@@ -129,3 +129,9 @@ filterwarnings = [
129
129
 
130
130
  [tool.hatch.build.targets.wheel]
131
131
  packages = ["src/glean"]
132
+
133
+ [dependency-groups]
134
+ dev = [
135
+ "pytest>=8.3.5",
136
+ "pytest-asyncio>=0.26.0",
137
+ ]
@@ -1,6 +1,7 @@
1
1
  from typing import List, Sequence
2
2
 
3
3
  from glean.api_client.models.userreferencedefinition import UserReferenceDefinition
4
+
4
5
  from glean.indexing.connectors import BaseStreamingDatasourceConnector
5
6
  from glean.indexing.models import ContentDefinition, CustomDatasourceConfig, DocumentDefinition
6
7
 
@@ -1,56 +1,56 @@
1
1
  """Glean Indexing SDK.
2
2
 
3
- A Python SDK for building custom Glean indexing solutions. This package provides
3
+ A Python SDK for building custom Glean indexing solutions. This package provides
4
4
  the base classes and utilities to create custom connectors for Glean's indexing APIs.
5
5
  """
6
6
 
7
- from importlib.metadata import version, PackageNotFoundError
7
+ from importlib.metadata import PackageNotFoundError, version
8
+
9
+ from glean.indexing import models
10
+ from glean.indexing.common import BatchProcessor, ConnectorMetrics, ContentFormatter, MockGleanClient, api_client
8
11
  from glean.indexing.connectors import (
12
+ BaseAsyncStreamingDataClient,
13
+ BaseAsyncStreamingDatasourceConnector,
9
14
  BaseConnector,
15
+ BaseDataClient,
10
16
  BaseDatasourceConnector,
11
- BaseStreamingDatasourceConnector,
12
17
  BasePeopleConnector,
13
- BaseConnectorDataClient,
14
- StreamingConnectorDataClient,
18
+ BaseStreamingDataClient,
19
+ BaseStreamingDatasourceConnector,
15
20
  )
16
- from glean.indexing.common import BatchProcessor, ContentFormatter, ConnectorMetrics, api_client, MockGleanClient
17
- from glean.indexing.observability.observability import ConnectorObservability
18
- from glean.indexing.testing import ConnectorTestHarness
19
21
  from glean.indexing.models import (
20
22
  DatasourceIdentityDefinitions,
21
23
  IndexingMode,
22
- TSourceData,
23
24
  TIndexableEntityDefinition,
25
+ TSourceData,
24
26
  )
25
- from glean.indexing import models
27
+ from glean.indexing.observability.observability import ConnectorObservability
28
+ from glean.indexing.testing import ConnectorTestHarness
26
29
 
27
30
  __all__ = [
28
31
  "BaseConnector",
32
+ "BaseDataClient",
29
33
  "BaseDatasourceConnector",
30
34
  "BasePeopleConnector",
35
+ "BaseStreamingDataClient",
31
36
  "BaseStreamingDatasourceConnector",
32
-
33
- "BaseConnectorDataClient",
34
- "StreamingConnectorDataClient",
35
-
37
+ "BaseAsyncStreamingDataClient",
38
+ "BaseAsyncStreamingDatasourceConnector",
36
39
  "BatchProcessor",
37
40
  "ContentFormatter",
38
41
  "ConnectorMetrics",
39
42
  "ConnectorObservability",
40
43
  "ConnectorTestHarness",
41
-
42
44
  "DatasourceIdentityDefinitions",
43
45
  "IndexingMode",
44
46
  "TSourceData",
45
47
  "TIndexableEntityDefinition",
46
-
47
48
  "MockGleanClient",
48
49
  "api_client",
49
-
50
50
  "models",
51
51
  ]
52
52
 
53
53
  try:
54
54
  __version__ = version("glean-indexing-sdk")
55
55
  except PackageNotFoundError:
56
- __version__ = "0.2.0"
56
+ __version__ = "0.3.1"