kodit 0.3.14__tar.gz → 0.3.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- {kodit-0.3.14 → kodit-0.3.16}/CLAUDE.md +3 -4
- {kodit-0.3.14 → kodit-0.3.16}/Makefile +4 -1
- {kodit-0.3.14 → kodit-0.3.16}/PKG-INFO +1 -1
- {kodit-0.3.14 → kodit-0.3.16}/docs/reference/api/index.md +1 -1
- {kodit-0.3.14 → kodit-0.3.16}/docs/reference/api/openapi.json +1 -1
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/_version.py +16 -3
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/application/services/code_indexing_application_service.py +22 -11
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/config.py +8 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/embedding/embedding_factory.py +5 -7
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +86 -14
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/enrichment/enrichment_factory.py +5 -8
- kodit-0.3.16/src/kodit/infrastructure/enrichment/openai_enrichment_provider.py +159 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/middleware.py +1 -0
- kodit-0.3.14/tests/kodit/application/test_code_indexing_application_service.py → kodit-0.3.16/tests/kodit/application/code_indexing_application_service_test.py +66 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/config_test.py +30 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/embedding/embedding_factory_test.py +1 -1
- kodit-0.3.16/tests/kodit/infrastructure/embedding/embedding_provider/openai_embedding_provider_test.py +367 -0
- kodit-0.3.16/tests/kodit/infrastructure/embedding/test_openai_socket_providers.py +215 -0
- kodit-0.3.14/tests/kodit/infrastructure/enrichment/test_enrichment_factory.py → kodit-0.3.16/tests/kodit/infrastructure/enrichment/enrichment_factory_test.py +47 -76
- kodit-0.3.16/tests/kodit/infrastructure/enrichment/enrichment_provider/openai_enrichment_provider_test.py +405 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/smoke.sh +11 -0
- kodit-0.3.14/src/kodit/infrastructure/enrichment/openai_enrichment_provider.py +0 -95
- kodit-0.3.14/tests/kodit/infrastructure/embedding/embedding_provider/test_openai_embedding_provider.py +0 -262
- kodit-0.3.14/tests/kodit/infrastructure/enrichment/enrichment_provider/test_openai_enrichment_provider.py +0 -349
- kodit-0.3.14/tests/kodit/mcp_test.py +0 -88
- {kodit-0.3.14 → kodit-0.3.16}/.claude/commands/debug.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/.claude/commands/new-requirement.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/.claude/commands/refactor.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/.claude/commands/update-docs.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/.claude/settings.json +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/.cursor/rules/kodit.mdc +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/.cursor/rules/style.mdc +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/.dockerignore +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/.github/CODE_OF_CONDUCT.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/.github/CONTRIBUTING.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/.github/dependabot.yml +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/.github/workflows/docker.yaml +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/.github/workflows/docs.yaml +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/.github/workflows/pull_request.yaml +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/.github/workflows/pypi-test.yaml +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/.github/workflows/pypi.yaml +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/.github/workflows/test.yaml +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/.gitignore +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/.python-version +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/.vscode/launch.json +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/.vscode/settings.json +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/Dockerfile +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/LICENSE +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/README.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/alembic.ini +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/_index.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/demos/_index.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/demos/go-simple-microservice/index.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/demos/knock-knock-auth/index.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/developer/index.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/getting-started/_index.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/getting-started/installation/index.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/getting-started/integration/index.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/getting-started/quick-start/index.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/reference/_index.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/reference/api/templates/_content.md.j2 +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/reference/api/templates/_example.md.j2 +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/reference/api/templates/_object_schema.md.j2 +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/reference/api/templates/_security_scheme.md.j2 +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/reference/api/templates/api_doc_template.md.j2 +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/reference/configuration/index.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/reference/deployment/docker-compose.yaml +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/reference/deployment/index.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/reference/deployment/kubernetes.yaml +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/reference/hosted-kodit/index.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/reference/indexing/index.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/reference/mcp/index.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/reference/sync/index.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/docs/reference/telemetry/index.md +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/pyproject.toml +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/.gitignore +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/app.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/application/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/application/factories/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/application/factories/code_indexing_factory.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/application/services/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/application/services/auto_indexing_service.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/application/services/sync_scheduler.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/cli.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/database.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/domain/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/domain/entities.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/domain/errors.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/domain/interfaces.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/domain/protocols.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/domain/services/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/domain/services/bm25_service.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/domain/services/embedding_service.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/domain/services/enrichment_service.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/domain/services/index_query_service.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/domain/services/index_service.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/domain/value_objects.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/api/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/api/middleware/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/api/middleware/auth.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/api/v1/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/api/v1/dependencies.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/api/v1/routers/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/api/v1/routers/indexes.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/api/v1/routers/search.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/api/v1/schemas/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/api/v1/schemas/context.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/api/v1/schemas/index.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/api/v1/schemas/search.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/bm25/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/bm25/bm25_factory.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/bm25/local_bm25_repository.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/bm25/vectorchord_bm25_repository.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/cloning/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/cloning/git/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/cloning/git/working_copy.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/cloning/metadata.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/embedding/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/embedding/embedding_providers/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/embedding/embedding_providers/batching.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/embedding/embedding_providers/hash_embedding_provider.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/embedding/embedding_providers/local_embedding_provider.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/embedding/local_vector_search_repository.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/enrichment/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/enrichment/local_enrichment_provider.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/git/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/git/git_utils.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/ignore/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/ignore/ignore_pattern_provider.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/indexing/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/indexing/fusion_service.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/mappers/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/mappers/index_mapper.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/slicing/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/slicing/language_detection_service.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/slicing/slicer.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/sqlalchemy/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/sqlalchemy/embedding_repository.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/sqlalchemy/entities.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/sqlalchemy/index_repository.py +4 -4
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/ui/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/ui/progress.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/infrastructure/ui/spinner.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/log.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/mcp.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/migrations/README +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/migrations/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/migrations/env.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/migrations/script.py.mako +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/migrations/versions/4552eb3f23ce_add_summary.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/migrations/versions/85155663351e_initial.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/migrations/versions/9e53ea8bb3b0_add_authors.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/migrations/versions/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/reporting.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/utils/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/utils/dump_openapi.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/src/kodit/utils/path_utils.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/conftest.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/docker-smoke.sh +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/experiments/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/experiments/cline_prompt_tests/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/experiments/cline_prompt_tests/cline_prompt.txt +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/experiments/cline_prompt_tests/cline_prompt_test.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/app_test.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/application/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/application/services/__init__.py +0 -0
- /kodit-0.3.14/tests/kodit/application/services/test_auto_indexing_service.py → /kodit-0.3.16/tests/kodit/application/services/auto_indexing_service_test.py +0 -0
- /kodit-0.3.14/tests/kodit/application/services/test_sync_scheduler.py → /kodit-0.3.16/tests/kodit/application/services/sync_scheduler_test.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/cli_test.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/domain/__init__.py +0 -0
- /kodit-0.3.14/tests/kodit/domain/bm25_domain_service_test.py → /kodit-0.3.16/tests/kodit/domain/bm25_service_test.py +0 -0
- /kodit-0.3.14/tests/kodit/domain/test_embedding_service.py → /kodit-0.3.16/tests/kodit/domain/embedding_service_test.py +0 -0
- /kodit-0.3.14/tests/kodit/domain/enrichment_domain_service_test.py → /kodit-0.3.16/tests/kodit/domain/enrichment_service_test.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/domain/entities_test.py +0 -0
- /kodit-0.3.14/tests/kodit/domain/test_language_mapping.py → /kodit-0.3.16/tests/kodit/domain/language_detection_service_test.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/domain/services/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/domain/services/index_service_test.py +0 -0
- /kodit-0.3.14/tests/kodit/domain/test_multi_search_result.py → /kodit-0.3.16/tests/kodit/domain/value_objects_test.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/e2e.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/bm25/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/bm25/local_bm25_repository_test.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/bm25/vectorchord_bm25_repository_test.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/cloning/git_cloning/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/cloning/git_cloning/working_copy_test.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/embedding/__init__.py +0 -0
- /kodit-0.3.14/tests/kodit/infrastructure/embedding/test_batching.py → /kodit-0.3.16/tests/kodit/infrastructure/embedding/batching_test.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/embedding/embedding_provider/__init__.py +0 -0
- /kodit-0.3.14/tests/kodit/infrastructure/embedding/embedding_provider/test_hash_embedding_provider.py → /kodit-0.3.16/tests/kodit/infrastructure/embedding/embedding_provider/hash_embedding_provider_test.py +0 -0
- /kodit-0.3.14/tests/kodit/infrastructure/embedding/embedding_provider/test_local_embedding_provider.py → /kodit-0.3.16/tests/kodit/infrastructure/embedding/embedding_provider/local_embedding_provider_test.py +0 -0
- /kodit-0.3.14/tests/kodit/infrastructure/embedding/test_local_vector_search_repository.py → /kodit-0.3.16/tests/kodit/infrastructure/embedding/local_vector_search_repository_test.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/embedding/test_embedding_integration.py +0 -0
- /kodit-0.3.14/tests/kodit/infrastructure/embedding/test_vectorchord_vector_search_repository.py → /kodit-0.3.16/tests/kodit/infrastructure/embedding/vectorchord_vector_search_repository_test.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/enrichment/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/enrichment/enrichment_provider/__init__.py +0 -0
- /kodit-0.3.14/tests/kodit/infrastructure/enrichment/enrichment_provider/test_local_enrichment_provider.py → /kodit-0.3.16/tests/kodit/infrastructure/enrichment/enrichment_provider/local_enrichment_provider_test.py +0 -0
- /kodit-0.3.14/tests/kodit/infrastructure/enrichment/enrichment_provider/test_null_enrichment_provider.py → /kodit-0.3.16/tests/kodit/infrastructure/enrichment/enrichment_provider/null_enrichment_provider_test.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/mappers/__init__.py +0 -0
- /kodit-0.3.14/tests/kodit/infrastructure/mappers/test_index_mapper.py → /kodit-0.3.16/tests/kodit/infrastructure/mappers/index_mapper_test.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/c/main.c +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/c/models.c +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/c/models.h +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/c/utils.c +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/c/utils.h +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/cpp/main.cpp +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/cpp/models.cpp +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/cpp/models.hpp +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/cpp/utils.cpp +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/cpp/utils.hpp +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/csharp/Main.cs +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/csharp/Models.cs +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/csharp/Utils.cs +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/css/components.css +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/css/main.css +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/css/utilities.css +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/go/main.go +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/go/models.go +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/go/utils.go +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/html/components.html +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/html/forms.html +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/html/main.html +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/java/Main.java +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/java/Models.java +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/java/Utils.java +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/javascript/main.js +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/javascript/models.js +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/javascript/utils.js +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/python/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/python/main.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/python/models.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/python/utils.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/rust/main.rs +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/rust/models.rs +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/data/rust/utils.rs +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/slicing/slicer_test.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/snippets/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/snippets/csharp.cs +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/snippets/golang.go +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/snippets/javascript.js +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/snippets/knock_knock_server.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/snippets/python.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/snippets/typescript.tsx +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/sqlalchemy/__init__.py +0 -0
- /kodit-0.3.14/tests/kodit/infrastructure/sqlalchemy/test_embedding_repository.py → /kodit-0.3.16/tests/kodit/infrastructure/sqlalchemy/embedding_repository_test.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/kodit/log_test.py +0 -0
- /kodit-0.3.14/tests/kodit/mcp_stdio_test.py → /kodit-0.3.16/tests/kodit/mcp_test.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/performance/__init__.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/performance/similarity.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/utils/__init__.py +0 -0
- /kodit-0.3.14/tests/utils/test_path_utils.py → /kodit-0.3.16/tests/utils/path_utils_test.py +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/tests/vectorchord-smoke.sh +0 -0
- {kodit-0.3.14 → kodit-0.3.16}/uv.lock +0 -0
|
@@ -76,10 +76,9 @@ The codebase follows Domain-Driven Design (DDD) with clean architecture:
|
|
|
76
76
|
|
|
77
77
|
Key environment variables:
|
|
78
78
|
|
|
79
|
-
- `
|
|
80
|
-
- `
|
|
81
|
-
- `
|
|
82
|
-
- `KODIT_LOG_LEVEL` - logging level
|
|
79
|
+
- `DB_URL` - database connection string
|
|
80
|
+
- `LOG_LEVEL` - logging level
|
|
81
|
+
- `DEFAULT_SEARCH_PROVIDER` - deciding whether to use vectorchord or sqlite
|
|
83
82
|
|
|
84
83
|
See `config.py` for full configuration options.
|
|
85
84
|
|
|
@@ -12,7 +12,7 @@ look at the [hosted version](https://kodit.helix.ml/docs).
|
|
|
12
12
|
This is the REST API for the Kodit server. Please refer to the
|
|
13
13
|
[Kodit documentation](https://docs.helix.ml/kodit/) for more information.
|
|
14
14
|
|
|
15
|
-
Current version: 0.3.
|
|
15
|
+
Current version: 0.3.16
|
|
16
16
|
|
|
17
17
|
## Authentication
|
|
18
18
|
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"info": {
|
|
4
4
|
"title": "kodit API",
|
|
5
5
|
"description": "\nThis is the REST API for the Kodit server. Please refer to the\n[Kodit documentation](https://docs.helix.ml/kodit/) for more information.\n ",
|
|
6
|
-
"version": "0.3.
|
|
6
|
+
"version": "0.3.16"
|
|
7
7
|
},
|
|
8
8
|
"paths": {
|
|
9
9
|
"/healthz": {
|
|
@@ -1,7 +1,14 @@
|
|
|
1
1
|
# file generated by setuptools-scm
|
|
2
2
|
# don't change, don't track in version control
|
|
3
3
|
|
|
4
|
-
__all__ = [
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
5
12
|
|
|
6
13
|
TYPE_CHECKING = False
|
|
7
14
|
if TYPE_CHECKING:
|
|
@@ -9,13 +16,19 @@ if TYPE_CHECKING:
|
|
|
9
16
|
from typing import Union
|
|
10
17
|
|
|
11
18
|
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
12
20
|
else:
|
|
13
21
|
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
14
23
|
|
|
15
24
|
version: str
|
|
16
25
|
__version__: str
|
|
17
26
|
__version_tuple__: VERSION_TUPLE
|
|
18
27
|
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
19
30
|
|
|
20
|
-
__version__ = version = '0.3.
|
|
21
|
-
__version_tuple__ = version_tuple = (0, 3,
|
|
31
|
+
__version__ = version = '0.3.16'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 3, 16)
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|
{kodit-0.3.14 → kodit-0.3.16}/src/kodit/application/services/code_indexing_application_service.py
RENAMED
|
@@ -241,27 +241,38 @@ class CodeIndexingApplicationService:
|
|
|
241
241
|
[x.id for x in final_results]
|
|
242
242
|
)
|
|
243
243
|
|
|
244
|
+
# Create a mapping from snippet ID to search result to handle cases where
|
|
245
|
+
# some snippet IDs don't exist (e.g., with vectorchord inconsistencies)
|
|
246
|
+
snippet_map = {
|
|
247
|
+
result.snippet.id: result
|
|
248
|
+
for result in search_results
|
|
249
|
+
if result.snippet.id is not None
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
# Filter final_results to only include IDs that we actually found snippets for
|
|
253
|
+
valid_final_results = [fr for fr in final_results if fr.id in snippet_map]
|
|
254
|
+
|
|
244
255
|
return [
|
|
245
256
|
MultiSearchResult(
|
|
246
|
-
id=
|
|
247
|
-
content=
|
|
257
|
+
id=snippet_map[fr.id].snippet.id or 0,
|
|
258
|
+
content=snippet_map[fr.id].snippet.original_text(),
|
|
248
259
|
original_scores=fr.original_scores,
|
|
249
260
|
# Enhanced fields
|
|
250
|
-
source_uri=str(
|
|
261
|
+
source_uri=str(snippet_map[fr.id].source.working_copy.remote_uri),
|
|
251
262
|
relative_path=str(
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
)
|
|
263
|
+
snippet_map[fr.id]
|
|
264
|
+
.file.as_path()
|
|
265
|
+
.relative_to(snippet_map[fr.id].source.working_copy.cloned_path)
|
|
255
266
|
),
|
|
256
267
|
language=MultiSearchResult.detect_language_from_extension(
|
|
257
|
-
|
|
268
|
+
snippet_map[fr.id].file.extension()
|
|
258
269
|
),
|
|
259
|
-
authors=[author.name for author in
|
|
260
|
-
created_at=
|
|
270
|
+
authors=[author.name for author in snippet_map[fr.id].authors],
|
|
271
|
+
created_at=snippet_map[fr.id].snippet.created_at or datetime.now(UTC),
|
|
261
272
|
# Summary from snippet entity
|
|
262
|
-
summary=
|
|
273
|
+
summary=snippet_map[fr.id].snippet.summary_text(),
|
|
263
274
|
)
|
|
264
|
-
for
|
|
275
|
+
for fr in valid_final_results
|
|
265
276
|
]
|
|
266
277
|
|
|
267
278
|
async def list_snippets(
|
|
@@ -49,6 +49,14 @@ class Endpoint(BaseModel):
|
|
|
49
49
|
model: str | None = None
|
|
50
50
|
api_key: str | None = None
|
|
51
51
|
num_parallel_tasks: int | None = None
|
|
52
|
+
socket_path: str | None = Field(
|
|
53
|
+
default=None,
|
|
54
|
+
description="Unix socket path for local communication (e.g., /tmp/openai.sock)",
|
|
55
|
+
)
|
|
56
|
+
timeout: float | None = Field(
|
|
57
|
+
default=None,
|
|
58
|
+
description="Request timeout in seconds (default: 30.0)",
|
|
59
|
+
)
|
|
52
60
|
|
|
53
61
|
|
|
54
62
|
class Search(BaseModel):
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
"""Factory for creating embedding services with DDD architecture."""
|
|
2
2
|
|
|
3
|
-
from openai import AsyncOpenAI
|
|
4
3
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
5
4
|
|
|
6
5
|
from kodit.config import AppContext, Endpoint
|
|
@@ -48,15 +47,14 @@ def embedding_domain_service_factory(
|
|
|
48
47
|
endpoint = _get_endpoint_configuration(app_context)
|
|
49
48
|
if endpoint and endpoint.type == "openai":
|
|
50
49
|
log_event("kodit.embedding", {"provider": "openai"})
|
|
50
|
+
# Use new httpx-based provider with socket support
|
|
51
51
|
embedding_provider = OpenAIEmbeddingProvider(
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
base_url=endpoint.base_url or "https://api.openai.com/v1",
|
|
55
|
-
timeout=10,
|
|
56
|
-
max_retries=2,
|
|
57
|
-
),
|
|
52
|
+
api_key=endpoint.api_key,
|
|
53
|
+
base_url=endpoint.base_url or "https://api.openai.com/v1",
|
|
58
54
|
model_name=endpoint.model or "text-embedding-3-small",
|
|
59
55
|
num_parallel_tasks=endpoint.num_parallel_tasks or OPENAI_NUM_PARALLEL_TASKS,
|
|
56
|
+
socket_path=endpoint.socket_path,
|
|
57
|
+
timeout=endpoint.timeout or 30.0,
|
|
60
58
|
)
|
|
61
59
|
else:
|
|
62
60
|
log_event("kodit.embedding", {"provider": "local"})
|
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
"""OpenAI embedding provider implementation."""
|
|
1
|
+
"""OpenAI embedding provider implementation using httpx."""
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
from collections.abc import AsyncGenerator
|
|
5
|
+
from typing import Any
|
|
5
6
|
|
|
7
|
+
import httpx
|
|
6
8
|
import structlog
|
|
7
9
|
import tiktoken
|
|
8
|
-
from openai import AsyncOpenAI
|
|
9
10
|
from tiktoken import Encoding
|
|
10
11
|
|
|
11
12
|
from kodit.domain.services.embedding_service import EmbeddingProvider
|
|
@@ -22,29 +23,53 @@ OPENAI_NUM_PARALLEL_TASKS = 10 # Semaphore limit for concurrent OpenAI requests
|
|
|
22
23
|
|
|
23
24
|
|
|
24
25
|
class OpenAIEmbeddingProvider(EmbeddingProvider):
|
|
25
|
-
"""OpenAI embedding provider that uses OpenAI's embedding API."""
|
|
26
|
+
"""OpenAI embedding provider that uses OpenAI's embedding API via httpx."""
|
|
26
27
|
|
|
27
|
-
def __init__(
|
|
28
|
+
def __init__( # noqa: PLR0913
|
|
28
29
|
self,
|
|
29
|
-
|
|
30
|
+
api_key: str | None = None,
|
|
31
|
+
base_url: str = "https://api.openai.com",
|
|
30
32
|
model_name: str = "text-embedding-3-small",
|
|
31
33
|
num_parallel_tasks: int = OPENAI_NUM_PARALLEL_TASKS,
|
|
34
|
+
socket_path: str | None = None,
|
|
35
|
+
timeout: float = 30.0,
|
|
32
36
|
) -> None:
|
|
33
37
|
"""Initialize the OpenAI embedding provider.
|
|
34
38
|
|
|
35
39
|
Args:
|
|
36
|
-
|
|
37
|
-
|
|
40
|
+
api_key: The OpenAI API key.
|
|
41
|
+
base_url: The base URL for the OpenAI API.
|
|
42
|
+
model_name: The model name to use for embeddings.
|
|
43
|
+
num_parallel_tasks: Maximum number of concurrent requests.
|
|
44
|
+
socket_path: Optional Unix socket path for local communication.
|
|
45
|
+
timeout: Request timeout in seconds.
|
|
38
46
|
|
|
39
47
|
"""
|
|
40
|
-
self.openai_client = openai_client
|
|
41
48
|
self.model_name = model_name
|
|
42
49
|
self.num_parallel_tasks = num_parallel_tasks
|
|
43
50
|
self.log = structlog.get_logger(__name__)
|
|
51
|
+
self.api_key = api_key
|
|
52
|
+
self.base_url = base_url
|
|
53
|
+
self.socket_path = socket_path
|
|
54
|
+
self.timeout = timeout
|
|
44
55
|
|
|
45
56
|
# Lazily initialised token encoding
|
|
46
57
|
self._encoding: Encoding | None = None
|
|
47
58
|
|
|
59
|
+
# Create httpx client with optional Unix socket support
|
|
60
|
+
if socket_path:
|
|
61
|
+
transport = httpx.AsyncHTTPTransport(uds=socket_path)
|
|
62
|
+
self.http_client = httpx.AsyncClient(
|
|
63
|
+
transport=transport,
|
|
64
|
+
base_url="http://localhost", # Base URL for Unix socket
|
|
65
|
+
timeout=timeout,
|
|
66
|
+
)
|
|
67
|
+
else:
|
|
68
|
+
self.http_client = httpx.AsyncClient(
|
|
69
|
+
base_url=base_url,
|
|
70
|
+
timeout=timeout,
|
|
71
|
+
)
|
|
72
|
+
|
|
48
73
|
# ---------------------------------------------------------------------
|
|
49
74
|
# Helper utilities
|
|
50
75
|
# ---------------------------------------------------------------------
|
|
@@ -52,7 +77,17 @@ class OpenAIEmbeddingProvider(EmbeddingProvider):
|
|
|
52
77
|
def _get_encoding(self) -> "Encoding":
|
|
53
78
|
"""Return (and cache) the tiktoken encoding for the chosen model."""
|
|
54
79
|
if self._encoding is None:
|
|
55
|
-
|
|
80
|
+
try:
|
|
81
|
+
self._encoding = tiktoken.encoding_for_model(self.model_name)
|
|
82
|
+
except KeyError:
|
|
83
|
+
# If the model is not supported by tiktoken, use a default encoding
|
|
84
|
+
self.log.info(
|
|
85
|
+
"Model not supported by tiktoken, using default encoding",
|
|
86
|
+
model_name=self.model_name,
|
|
87
|
+
default_encoding="o200k_base",
|
|
88
|
+
)
|
|
89
|
+
self._encoding = tiktoken.get_encoding("o200k_base")
|
|
90
|
+
|
|
56
91
|
return self._encoding
|
|
57
92
|
|
|
58
93
|
def _split_sub_batches(
|
|
@@ -66,6 +101,37 @@ class OpenAIEmbeddingProvider(EmbeddingProvider):
|
|
|
66
101
|
batch_size=BATCH_SIZE,
|
|
67
102
|
)
|
|
68
103
|
|
|
104
|
+
async def _call_embeddings_api(
|
|
105
|
+
self, texts: list[str]
|
|
106
|
+
) -> dict[str, Any]:
|
|
107
|
+
"""Call the embeddings API using httpx.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
texts: The texts to embed.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
The API response as a dictionary.
|
|
114
|
+
|
|
115
|
+
"""
|
|
116
|
+
headers = {
|
|
117
|
+
"Content-Type": "application/json",
|
|
118
|
+
}
|
|
119
|
+
if self.api_key:
|
|
120
|
+
headers["Authorization"] = f"Bearer {self.api_key}"
|
|
121
|
+
|
|
122
|
+
data = {
|
|
123
|
+
"model": self.model_name,
|
|
124
|
+
"input": texts,
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
response = await self.http_client.post(
|
|
128
|
+
"/v1/embeddings",
|
|
129
|
+
json=data,
|
|
130
|
+
headers=headers,
|
|
131
|
+
)
|
|
132
|
+
response.raise_for_status()
|
|
133
|
+
return response.json()
|
|
134
|
+
|
|
69
135
|
async def embed(
|
|
70
136
|
self, data: list[EmbeddingRequest]
|
|
71
137
|
) -> AsyncGenerator[list[EmbeddingResponse], None]:
|
|
@@ -89,17 +155,17 @@ class OpenAIEmbeddingProvider(EmbeddingProvider):
|
|
|
89
155
|
) -> list[EmbeddingResponse]:
|
|
90
156
|
async with sem:
|
|
91
157
|
try:
|
|
92
|
-
response = await self.
|
|
93
|
-
|
|
94
|
-
input=[item.text for item in batch],
|
|
158
|
+
response = await self._call_embeddings_api(
|
|
159
|
+
[item.text for item in batch]
|
|
95
160
|
)
|
|
161
|
+
embeddings_data = response.get("data", [])
|
|
96
162
|
|
|
97
163
|
return [
|
|
98
164
|
EmbeddingResponse(
|
|
99
165
|
snippet_id=item.snippet_id,
|
|
100
|
-
embedding=
|
|
166
|
+
embedding=emb_data.get("embedding", []),
|
|
101
167
|
)
|
|
102
|
-
for item,
|
|
168
|
+
for item, emb_data in zip(batch, embeddings_data, strict=True)
|
|
103
169
|
]
|
|
104
170
|
except Exception as e:
|
|
105
171
|
self.log.exception("Error embedding batch", error=str(e))
|
|
@@ -109,3 +175,9 @@ class OpenAIEmbeddingProvider(EmbeddingProvider):
|
|
|
109
175
|
tasks = [_process_batch(batch) for batch in batched_data]
|
|
110
176
|
for task in asyncio.as_completed(tasks):
|
|
111
177
|
yield await task
|
|
178
|
+
|
|
179
|
+
async def close(self) -> None:
|
|
180
|
+
"""Close the HTTP client."""
|
|
181
|
+
if hasattr(self, "http_client"):
|
|
182
|
+
await self.http_client.aclose()
|
|
183
|
+
|
|
@@ -45,17 +45,14 @@ def enrichment_domain_service_factory(
|
|
|
45
45
|
enrichment_provider: EnrichmentProvider | None = None
|
|
46
46
|
if endpoint and endpoint.type == "openai":
|
|
47
47
|
log_event("kodit.enrichment", {"provider": "openai"})
|
|
48
|
-
|
|
49
|
-
|
|
48
|
+
# Use new httpx-based provider with socket support
|
|
50
49
|
enrichment_provider = OpenAIEnrichmentProvider(
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
base_url=endpoint.base_url or "https://api.openai.com/v1",
|
|
54
|
-
timeout=60,
|
|
55
|
-
max_retries=2,
|
|
56
|
-
),
|
|
50
|
+
api_key=endpoint.api_key,
|
|
51
|
+
base_url=endpoint.base_url or "https://api.openai.com/v1",
|
|
57
52
|
model_name=endpoint.model or "gpt-4o-mini",
|
|
58
53
|
num_parallel_tasks=endpoint.num_parallel_tasks or OPENAI_NUM_PARALLEL_TASKS,
|
|
54
|
+
socket_path=endpoint.socket_path,
|
|
55
|
+
timeout=endpoint.timeout or 30.0,
|
|
59
56
|
)
|
|
60
57
|
else:
|
|
61
58
|
log_event("kodit.enrichment", {"provider": "local"})
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""OpenAI enrichment provider implementation using httpx."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from collections.abc import AsyncGenerator
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
import structlog
|
|
9
|
+
|
|
10
|
+
from kodit.domain.services.enrichment_service import EnrichmentProvider
|
|
11
|
+
from kodit.domain.value_objects import EnrichmentRequest, EnrichmentResponse
|
|
12
|
+
|
|
13
|
+
ENRICHMENT_SYSTEM_PROMPT = """
|
|
14
|
+
You are a professional software developer. You will be given a snippet of code.
|
|
15
|
+
Please provide a concise explanation of the code.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
# Default tuned to approximately fit within OpenAI's rate limit of 500 / RPM
|
|
19
|
+
OPENAI_NUM_PARALLEL_TASKS = 40
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class OpenAIEnrichmentProvider(EnrichmentProvider):
|
|
23
|
+
"""OpenAI enrichment provider implementation using httpx."""
|
|
24
|
+
|
|
25
|
+
def __init__( # noqa: PLR0913
|
|
26
|
+
self,
|
|
27
|
+
api_key: str | None = None,
|
|
28
|
+
base_url: str = "https://api.openai.com",
|
|
29
|
+
model_name: str = "gpt-4o-mini",
|
|
30
|
+
num_parallel_tasks: int = OPENAI_NUM_PARALLEL_TASKS,
|
|
31
|
+
socket_path: str | None = None,
|
|
32
|
+
timeout: float = 30.0,
|
|
33
|
+
) -> None:
|
|
34
|
+
"""Initialize the OpenAI enrichment provider.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
api_key: The OpenAI API key.
|
|
38
|
+
base_url: The base URL for the OpenAI API.
|
|
39
|
+
model_name: The model name to use for enrichment.
|
|
40
|
+
num_parallel_tasks: Maximum number of concurrent requests.
|
|
41
|
+
socket_path: Optional Unix socket path for local communication.
|
|
42
|
+
timeout: Request timeout in seconds.
|
|
43
|
+
|
|
44
|
+
"""
|
|
45
|
+
self.log = structlog.get_logger(__name__)
|
|
46
|
+
self.model_name = model_name
|
|
47
|
+
self.num_parallel_tasks = num_parallel_tasks
|
|
48
|
+
self.api_key = api_key
|
|
49
|
+
self.base_url = base_url
|
|
50
|
+
self.socket_path = socket_path
|
|
51
|
+
self.timeout = timeout
|
|
52
|
+
|
|
53
|
+
# Create httpx client with optional Unix socket support
|
|
54
|
+
if socket_path:
|
|
55
|
+
transport = httpx.AsyncHTTPTransport(uds=socket_path)
|
|
56
|
+
self.http_client = httpx.AsyncClient(
|
|
57
|
+
transport=transport,
|
|
58
|
+
base_url="http://localhost", # Base URL for Unix socket
|
|
59
|
+
timeout=timeout,
|
|
60
|
+
)
|
|
61
|
+
else:
|
|
62
|
+
self.http_client = httpx.AsyncClient(
|
|
63
|
+
base_url=base_url,
|
|
64
|
+
timeout=timeout,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
async def _call_chat_completion(
|
|
68
|
+
self, messages: list[dict[str, str]]
|
|
69
|
+
) -> dict[str, Any]:
|
|
70
|
+
"""Call the chat completion API using httpx.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
messages: The messages to send to the API.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
The API response as a dictionary.
|
|
77
|
+
|
|
78
|
+
"""
|
|
79
|
+
headers = {
|
|
80
|
+
"Content-Type": "application/json",
|
|
81
|
+
}
|
|
82
|
+
if self.api_key:
|
|
83
|
+
headers["Authorization"] = f"Bearer {self.api_key}"
|
|
84
|
+
|
|
85
|
+
data = {
|
|
86
|
+
"model": self.model_name,
|
|
87
|
+
"messages": messages,
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
response = await self.http_client.post(
|
|
91
|
+
"/v1/chat/completions",
|
|
92
|
+
json=data,
|
|
93
|
+
headers=headers,
|
|
94
|
+
)
|
|
95
|
+
response.raise_for_status()
|
|
96
|
+
return response.json()
|
|
97
|
+
|
|
98
|
+
async def enrich(
|
|
99
|
+
self, requests: list[EnrichmentRequest]
|
|
100
|
+
) -> AsyncGenerator[EnrichmentResponse, None]:
|
|
101
|
+
"""Enrich a list of requests using OpenAI API.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
requests: List of enrichment requests.
|
|
105
|
+
|
|
106
|
+
Yields:
|
|
107
|
+
Enrichment responses as they are processed.
|
|
108
|
+
|
|
109
|
+
"""
|
|
110
|
+
if not requests:
|
|
111
|
+
self.log.warning("No requests for enrichment")
|
|
112
|
+
return
|
|
113
|
+
|
|
114
|
+
# Process batches in parallel with a semaphore to limit concurrent requests
|
|
115
|
+
sem = asyncio.Semaphore(self.num_parallel_tasks)
|
|
116
|
+
|
|
117
|
+
async def process_request(request: EnrichmentRequest) -> EnrichmentResponse:
|
|
118
|
+
async with sem:
|
|
119
|
+
if not request.text:
|
|
120
|
+
return EnrichmentResponse(
|
|
121
|
+
snippet_id=request.snippet_id,
|
|
122
|
+
text="",
|
|
123
|
+
)
|
|
124
|
+
try:
|
|
125
|
+
messages = [
|
|
126
|
+
{
|
|
127
|
+
"role": "system",
|
|
128
|
+
"content": ENRICHMENT_SYSTEM_PROMPT,
|
|
129
|
+
},
|
|
130
|
+
{"role": "user", "content": request.text},
|
|
131
|
+
]
|
|
132
|
+
response = await self._call_chat_completion(messages)
|
|
133
|
+
content = (
|
|
134
|
+
response.get("choices", [{}])[0]
|
|
135
|
+
.get("message", {})
|
|
136
|
+
.get("content", "")
|
|
137
|
+
)
|
|
138
|
+
return EnrichmentResponse(
|
|
139
|
+
snippet_id=request.snippet_id,
|
|
140
|
+
text=content or "",
|
|
141
|
+
)
|
|
142
|
+
except Exception as e:
|
|
143
|
+
self.log.exception("Error enriching request", error=str(e))
|
|
144
|
+
return EnrichmentResponse(
|
|
145
|
+
snippet_id=request.snippet_id,
|
|
146
|
+
text="",
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
# Create tasks for all requests
|
|
150
|
+
tasks = [process_request(request) for request in requests]
|
|
151
|
+
|
|
152
|
+
# Process all requests and yield results as they complete
|
|
153
|
+
for task in asyncio.as_completed(tasks):
|
|
154
|
+
yield await task
|
|
155
|
+
|
|
156
|
+
async def close(self) -> None:
|
|
157
|
+
"""Close the HTTP client."""
|
|
158
|
+
if hasattr(self, "http_client"):
|
|
159
|
+
await self.http_client.aclose()
|
|
@@ -53,6 +53,7 @@ async def logging_middleware(request: Request, call_next: Callable) -> Response:
|
|
|
53
53
|
"client_host": client_host,
|
|
54
54
|
"client_port": client_port,
|
|
55
55
|
},
|
|
56
|
+
headers=dict(request.headers),
|
|
56
57
|
network={"client": {"ip": client_host, "port": client_port}},
|
|
57
58
|
duration=process_time,
|
|
58
59
|
)
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Any
|
|
5
|
+
from unittest.mock import patch
|
|
5
6
|
|
|
6
7
|
import pytest
|
|
7
8
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
@@ -13,10 +14,13 @@ from kodit.application.services.code_indexing_application_service import (
|
|
|
13
14
|
CodeIndexingApplicationService,
|
|
14
15
|
)
|
|
15
16
|
from kodit.config import AppContext
|
|
17
|
+
from kodit.domain.entities import SnippetWithContext
|
|
16
18
|
from kodit.domain.interfaces import ProgressCallback
|
|
17
19
|
from kodit.domain.protocols import IndexRepository
|
|
18
20
|
from kodit.domain.services.index_query_service import IndexQueryService
|
|
19
21
|
from kodit.domain.value_objects import (
|
|
22
|
+
FusionRequest,
|
|
23
|
+
FusionResult,
|
|
20
24
|
MultiSearchRequest,
|
|
21
25
|
ProgressEvent,
|
|
22
26
|
)
|
|
@@ -292,3 +296,65 @@ def subtract(a: int, b: int) -> int:
|
|
|
292
296
|
# The above should not raise an error
|
|
293
297
|
final_index = await indexing_query_service.get_index_by_id(index.id)
|
|
294
298
|
assert final_index is not None
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
@pytest.mark.asyncio
|
|
302
|
+
async def test_vectorchord_bug_zip_mismatch(
|
|
303
|
+
code_indexing_service: CodeIndexingApplicationService,
|
|
304
|
+
tmp_path: Path,
|
|
305
|
+
) -> None:
|
|
306
|
+
"""Test that reproduces the vectorchord bug with zip() length mismatch.
|
|
307
|
+
|
|
308
|
+
This happens when get_snippets_by_ids returns fewer snippets than the
|
|
309
|
+
number of IDs in final_results, which can occur when some snippet IDs
|
|
310
|
+
don't exist in the database or when related files/sources are missing.
|
|
311
|
+
"""
|
|
312
|
+
# Create a temporary Python file
|
|
313
|
+
test_file = tmp_path / "test_code.py"
|
|
314
|
+
test_file.write_text("""
|
|
315
|
+
def add(a: int, b: int) -> int:
|
|
316
|
+
return a + b
|
|
317
|
+
|
|
318
|
+
def subtract(a: int, b: int) -> int:
|
|
319
|
+
return a - b
|
|
320
|
+
""")
|
|
321
|
+
|
|
322
|
+
# Create initial index
|
|
323
|
+
index = await code_indexing_service.create_index_from_uri(str(tmp_path))
|
|
324
|
+
await code_indexing_service.run_index(index)
|
|
325
|
+
assert len(index.snippets) > 0, "Should have snippets for initial file"
|
|
326
|
+
|
|
327
|
+
# Mock perform_fusion to always return some fake results
|
|
328
|
+
# This ensures final_results is not empty
|
|
329
|
+
async def mock_perform_fusion(
|
|
330
|
+
rankings: list[list[FusionRequest]], # noqa: ARG001
|
|
331
|
+
k: float = 60.0, # noqa: ARG001
|
|
332
|
+
) -> list[FusionResult]:
|
|
333
|
+
# Always return some fake fusion results to ensure final_results is populated
|
|
334
|
+
return [
|
|
335
|
+
FusionResult(id=99999, score=1.0, original_scores=[1.0]),
|
|
336
|
+
FusionResult(id=99998, score=0.8, original_scores=[0.8]),
|
|
337
|
+
]
|
|
338
|
+
|
|
339
|
+
# Mock get_snippets_by_ids to return an empty list
|
|
340
|
+
# This ensures search_results is empty while final_results is not
|
|
341
|
+
async def mock_get_snippets_by_ids(ids: list[int]) -> list[SnippetWithContext]: # noqa: ARG001
|
|
342
|
+
return []
|
|
343
|
+
|
|
344
|
+
# Apply the mocks using patch.object to avoid mypy errors
|
|
345
|
+
with (
|
|
346
|
+
patch.object(
|
|
347
|
+
code_indexing_service.index_query_service,
|
|
348
|
+
"perform_fusion",
|
|
349
|
+
side_effect=mock_perform_fusion,
|
|
350
|
+
),
|
|
351
|
+
patch.object(
|
|
352
|
+
code_indexing_service.index_query_service,
|
|
353
|
+
"get_snippets_by_ids",
|
|
354
|
+
side_effect=mock_get_snippets_by_ids,
|
|
355
|
+
),
|
|
356
|
+
):
|
|
357
|
+
# This search used to fail with ValueError: zip() argument 2 is longer
|
|
358
|
+
await code_indexing_service.search(
|
|
359
|
+
MultiSearchRequest(keywords=["add"], top_k=5)
|
|
360
|
+
)
|
|
@@ -114,3 +114,33 @@ class TestAppContextAutoIndexing:
|
|
|
114
114
|
del os.environ["AUTO_INDEXING_SOURCES_0_URI"]
|
|
115
115
|
if "DEFAULT_ENDPOINT_API_KEY" in os.environ:
|
|
116
116
|
del os.environ["DEFAULT_ENDPOINT_API_KEY"]
|
|
117
|
+
|
|
118
|
+
def test_endpoint_timeout_configuration(self) -> None:
|
|
119
|
+
"""Test endpoint timeout configuration from env vars."""
|
|
120
|
+
# Set environment variables for different endpoint timeouts
|
|
121
|
+
os.environ["DEFAULT_ENDPOINT_TIMEOUT"] = "45.0"
|
|
122
|
+
os.environ["EMBEDDING_ENDPOINT_TIMEOUT"] = "60.0"
|
|
123
|
+
os.environ["ENRICHMENT_ENDPOINT_TIMEOUT"] = "90.0"
|
|
124
|
+
|
|
125
|
+
try:
|
|
126
|
+
# Create new context with env vars
|
|
127
|
+
app_context = AppContext()
|
|
128
|
+
|
|
129
|
+
# Verify timeout configurations
|
|
130
|
+
assert app_context.default_endpoint is not None
|
|
131
|
+
assert app_context.default_endpoint.timeout == 45.0
|
|
132
|
+
|
|
133
|
+
assert app_context.embedding_endpoint is not None
|
|
134
|
+
assert app_context.embedding_endpoint.timeout == 60.0
|
|
135
|
+
|
|
136
|
+
assert app_context.enrichment_endpoint is not None
|
|
137
|
+
assert app_context.enrichment_endpoint.timeout == 90.0
|
|
138
|
+
|
|
139
|
+
finally:
|
|
140
|
+
# Clean up environment variables
|
|
141
|
+
if "DEFAULT_ENDPOINT_TIMEOUT" in os.environ:
|
|
142
|
+
del os.environ["DEFAULT_ENDPOINT_TIMEOUT"]
|
|
143
|
+
if "EMBEDDING_ENDPOINT_TIMEOUT" in os.environ:
|
|
144
|
+
del os.environ["EMBEDDING_ENDPOINT_TIMEOUT"]
|
|
145
|
+
if "ENRICHMENT_ENDPOINT_TIMEOUT" in os.environ:
|
|
146
|
+
del os.environ["ENRICHMENT_ENDPOINT_TIMEOUT"]
|
{kodit-0.3.14 → kodit-0.3.16}/tests/kodit/infrastructure/embedding/embedding_factory_test.py
RENAMED
|
@@ -82,4 +82,4 @@ async def test_embedding_domain_service_factory(
|
|
|
82
82
|
)
|
|
83
83
|
assert isinstance(service.vector_search_repository, LocalVectorSearchRepository)
|
|
84
84
|
assert isinstance(service.embedding_provider, OpenAIEmbeddingProvider)
|
|
85
|
-
assert service.embedding_provider.
|
|
85
|
+
assert service.embedding_provider.base_url == test_base_url
|