docs2vecs 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. docs2vecs-0.0.1/.dockerignore +1 -0
  2. docs2vecs-0.0.1/.github/workflows/ci.yml +33 -0
  3. docs2vecs-0.0.1/.github/workflows/release-oci-image.yml +165 -0
  4. docs2vecs-0.0.1/.github/workflows/release-pypi.yml +142 -0
  5. docs2vecs-0.0.1/.gitignore +76 -0
  6. docs2vecs-0.0.1/.python-version +1 -0
  7. docs2vecs-0.0.1/CONTRIBUTING.md +90 -0
  8. docs2vecs-0.0.1/Dockerfile +29 -0
  9. docs2vecs-0.0.1/LICENSE +9 -0
  10. docs2vecs-0.0.1/PKG-INFO +232 -0
  11. docs2vecs-0.0.1/README.md +199 -0
  12. docs2vecs-0.0.1/docs/readme/indexer-skills.md +228 -0
  13. docs2vecs-0.0.1/docs/readme/sample-config-file.yml +42 -0
  14. docs2vecs-0.0.1/docs/readme/vectorize.gif +0 -0
  15. docs2vecs-0.0.1/etc/ca-bundle.crt +103 -0
  16. docs2vecs-0.0.1/etc/fetchDefaultModels.py +44 -0
  17. docs2vecs-0.0.1/etc/get_certs.sh +2 -0
  18. docs2vecs-0.0.1/pyproject.toml +57 -0
  19. docs2vecs-0.0.1/src/docs2vecs/__init__.py +9 -0
  20. docs2vecs-0.0.1/src/docs2vecs/__main__.py +4 -0
  21. docs2vecs-0.0.1/src/docs2vecs/_version.py +21 -0
  22. docs2vecs-0.0.1/src/docs2vecs/app.py +109 -0
  23. docs2vecs-0.0.1/src/docs2vecs/cli.py +191 -0
  24. docs2vecs-0.0.1/src/docs2vecs/core.py +285 -0
  25. docs2vecs-0.0.1/src/docs2vecs/readers/__init__.py +0 -0
  26. docs2vecs-0.0.1/src/docs2vecs/readers/yaml.py +83 -0
  27. docs2vecs-0.0.1/src/docs2vecs/static/app.js +122 -0
  28. docs2vecs-0.0.1/src/docs2vecs/static/index.html +39 -0
  29. docs2vecs-0.0.1/src/docs2vecs/static/styles.css +155 -0
  30. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/__init__.py +0 -0
  31. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/config/__init__.py +3 -0
  32. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/config/config.py +63 -0
  33. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/config/config_schema.yaml +204 -0
  34. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/db/__init__.py +3 -0
  35. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/db/mongodb.py +29 -0
  36. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/document/__init__.py +4 -0
  37. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/document/chunk.py +43 -0
  38. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/document/document.py +15 -0
  39. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/indexer.py +41 -0
  40. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/skills/__init__.py +14 -0
  41. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/skills/ada002_embedding_skill.py +34 -0
  42. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/skills/azure_blob_store_uploader_skill.py +58 -0
  43. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/skills/azure_vector_store_skill.py +108 -0
  44. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/skills/chromadb_vector_store_skill.py +38 -0
  45. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/skills/default_file_reader.py +118 -0
  46. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/skills/document_intelligence_skill.py +35 -0
  47. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/skills/factory.py +117 -0
  48. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/skills/file_scanner_skill.py +62 -0
  49. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/skills/jira_loader_skill.py +66 -0
  50. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/skills/llama_fastembed_embedding_skill.py +71 -0
  51. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/skills/local_document_parser.py +29 -0
  52. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/skills/logger.py +44 -0
  53. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/skills/recursive_character_splitter_skill.py +52 -0
  54. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/skills/scrollwordexporter_skill.py +132 -0
  55. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/skills/semantic_splitter_skill.py +63 -0
  56. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/skills/skill.py +37 -0
  57. docs2vecs-0.0.1/src/docs2vecs/subcommands/indexer/skills/tracker.py +21 -0
  58. docs2vecs-0.0.1/src/docs2vecs/subcommands/integrated_vec/config/config_schema.yaml +54 -0
  59. docs2vecs-0.0.1/src/docs2vecs/subcommands/integrated_vec/integrated_vec.py +166 -0
  60. docs2vecs-0.0.1/src/logs/indexer_skills.log +103 -0
  61. docs2vecs-0.0.1/tests/test_cli.py +6 -0
  62. docs2vecs-0.0.1/tests/test_config.py +96 -0
  63. docs2vecs-0.0.1/tests/test_data/test_config.yaml +60 -0
  64. docs2vecs-0.0.1/tests/test_data/test_config_integrated_vec.yaml +17 -0
  65. docs2vecs-0.0.1/tox.ini +81 -0
  66. docs2vecs-0.0.1/uv.lock +3800 -0
@@ -0,0 +1 @@
1
+ .venv
@@ -0,0 +1,33 @@
1
+ name: Build and test
2
+ on: [push, pull_request, workflow_dispatch]
3
+ jobs:
4
+ test:
5
+ name: ${{ matrix.name }}
6
+ runs-on: ${{ matrix.os }}
7
+ timeout-minutes: 30
8
+ strategy:
9
+ fail-fast: false
10
+ matrix:
11
+ include:
12
+ - name: 'test'
13
+ python: '3.11'
14
+ os: 'ubuntu-latest'
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+ with:
18
+ fetch-depth: 0
19
+
20
+ - name: Install uv
21
+ uses: astral-sh/setup-uv@v5
22
+ with:
23
+ version: "0.6.3"
24
+
25
+ - name: Setup Python
26
+ run: uv python install ${{ matrix.python }}
27
+
28
+ - name: Install the project
29
+ run: uv sync --all-extras --dev
30
+
31
+ - name: Unit tests
32
+ run: uv run pytest tests
33
+
@@ -0,0 +1,165 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - v*
7
+ branches:
8
+ - main
9
+ pull_request:
10
+ branches:
11
+ - main
12
+ # workflow_dispatch:
13
+ # inputs:
14
+ # tag:
15
+ # description: 'Tag to create'
16
+ # required: true
17
+ # default: 'v0.0.0'
18
+ # version:
19
+ # description: 'Version to create'
20
+ # required: true
21
+ # default: '0.0.0'
22
+ permissions:
23
+ pull-requests: read
24
+ contents: read
25
+
26
+ env:
27
+ OCI_REGISTRY: ghcr.io
28
+
29
+ jobs:
30
+ generate-release-version:
31
+ runs-on: ubuntu-latest
32
+ outputs:
33
+ generated_version: ${{ steps.output-generated-version.outputs.generated_version }}
34
+ image_tag: ${{ steps.generate_tag.outputs.image_tag }}
35
+ image_tag_latest: ${{ steps.output-generated-version.outputs.image_tag_latest }}
36
+ tag_prefix: ${{ steps.generate_tag.outputs.tag_prefix }}
37
+ permissions:
38
+ contents: write
39
+ id-token: write
40
+ steps:
41
+ -
42
+ name: Checkout
43
+ uses: actions/checkout@v4
44
+ with:
45
+ fetch-depth: 0
46
+ -
47
+ name: Generate next version
48
+ uses: paulhatch/semantic-version@v5.4.0
49
+ id: semantic-version
50
+ with:
51
+ tag_prefix: 'v'
52
+ major_pattern: "/((BREAKING CHANGE|BREAKING|breaking|MAJOR|major):?|\\S*!).*/"
53
+ minor_pattern: '/(FEATURE|feature|MINOR|minor|FEAT|feat):?[^!].*/'
54
+ version_format: '${major}.${minor}.${patch}'
55
+ -
56
+ name: Output generated version
57
+ id: output-generated-version
58
+ run: |
59
+ echo "generated_version=${{ steps.semantic-version.outputs.version }}" >> $GITHUB_OUTPUT
60
+ echo "image_tag_latest=${{ env.OCI_REGISTRY }}/${repo_name}:latest" >> $GITHUB_OUTPUT
61
+
62
+ -
63
+ name: Version tag ${{ steps.semantic-version.outputs.version }} on main branch
64
+ if: ${{ github.ref == 'refs/heads/main' }}
65
+ shell: bash
66
+ run: |
67
+ echo "generated_tag=${{ steps.semantic-version.outputs.version }}" >> $GITHUB_ENV
68
+ echo "GITHUB_REPO=${GITHUB_REPOSITORY,,}" >>${GITHUB_ENV}
69
+ echo "tag_prefix=v" >> $GITHUB_OUTPUT
70
+
71
+ -
72
+ name: Version tag ${{ steps.semantic-version.outputs.version }} on pull request
73
+ if: ${{ github.event_name == 'pull_request' }}
74
+ shell: bash
75
+ run: |
76
+ echo "generated_tag=pr-${{github.event.pull_request.number}}-${{ steps.semantic-version.outputs.version }}-$(git rev-parse --short HEAD)" >> $GITHUB_ENV
77
+ echo "GITHUB_REPO=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV}
78
+ echo "tag_prefix=" >> $GITHUB_OUTPUT
79
+
80
+ -
81
+ name: Compute image tag based on version tag ${{ env.generated_tag }}
82
+ id: generate_tag
83
+ shell: bash
84
+ run: |
85
+ # image tag needs to be all lower case
86
+ repo_name=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]')
87
+ echo "image_tag=${{ env.OCI_REGISTRY }}/${repo_name}:${{ env.generated_tag }}" >> $GITHUB_OUTPUT
88
+
89
+ docker-image-release:
90
+ needs: [generate-release-version]
91
+ timeout-minutes: 30
92
+ permissions:
93
+ contents: read
94
+ packages: write
95
+ id-token: write
96
+ strategy:
97
+ fail-fast: false
98
+ matrix:
99
+ os: [ubuntu-24.04, ubuntu-24.04-arm]
100
+ runs-on: ${{ matrix.os }}
101
+ steps:
102
+ -
103
+ uses: actions/checkout@v4
104
+ -
105
+ uses: docker/setup-docker-action@v4.1.0
106
+ -
107
+ name: Login to GitHub Container Registry
108
+ uses: docker/login-action@v3
109
+ with:
110
+ registry: ${{ env.OCI_REGISTRY }}
111
+ username: ${{ github.actor }}
112
+ password: ${{ secrets.GITHUB_TOKEN }}
113
+ -
114
+ name: Build image for ${{ matrix.os }}
115
+ run: |
116
+ docker build --tag ${{ needs.generate-release-version.outputs.image_tag }}-${{ runner.arch }} .
117
+ docker images
118
+ docker run -i ${{ needs.generate-release-version.outputs.image_tag }}-${{ runner.arch }} -h
119
+ docker image inspect ${{ needs.generate-release-version.outputs.image_tag }}-${{ runner.arch }}
120
+ -
121
+ name: Push image for ${{ matrix.os }}
122
+ if: github.event_name == 'tag' && startsWith(github.ref, 'refs/tags/v')
123
+ run: |
124
+ docker push ${{ needs.generate-release-version.outputs.image_tag }}-${{ runner.arch }}
125
+
126
+ create-image-release:
127
+ if: github.event_name == 'tag' && startsWith(github.ref, 'refs/tags/v')
128
+ permissions:
129
+ packages: write
130
+ id-token: write
131
+ contents: write
132
+ issues: write
133
+ pull-requests: write
134
+
135
+ needs: [generate-release-version, docker-image-release]
136
+ runs-on: ubuntu-24.04
137
+ steps:
138
+ -
139
+ uses: actions/checkout@v4
140
+ -
141
+ uses: docker/setup-docker-action@v4.1.0
142
+ -
143
+ name: Login to GitHub Container Registry
144
+ uses: docker/login-action@v3
145
+ with:
146
+ registry: ${{ env.OCI_REGISTRY }}
147
+ username: ${{ github.actor }}
148
+ password: ${{ secrets.GITHUB_TOKEN }}
149
+ -
150
+ name: Build manifest and push it registry
151
+ run: |
152
+ docker manifest create \
153
+ ${{ needs.generate-release-version.outputs.image_tag }} \
154
+ --amend ${{ needs.generate-release-version.outputs.image_tag }}-X64 \
155
+ --amend ${{ needs.generate-release-version.outputs.image_tag }}-ARM64
156
+ docker manifest push ${{ needs.generate-release-version.outputs.image_tag }}
157
+ docker manifest inspect ${{ needs.generate-release-version.outputs.image_tag }}
158
+
159
+ docker manifest create \
160
+ ${{ needs.generate-release-version.outputs.image_tag_latest }} \
161
+ --amend ${{ needs.generate-release-version.outputs.image_tag_latest }}-X64 \
162
+ --amend ${{ needs.generate-release-version.outputs.image_tag_latest }}-ARM64
163
+ docker manifest push ${{ needs.generate-release-version.outputs.image_tag_latest }}
164
+ docker manifest inspect ${{ needs.generate-release-version.outputs.image_tag_latest }}
165
+
@@ -0,0 +1,142 @@
1
+ name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - v*
7
+ branches:
8
+ - main
9
+ pull_request:
10
+ branches:
11
+ - main
12
+
13
+ jobs:
14
+ build:
15
+ name: Build distribution 📦
16
+ runs-on: ubuntu-latest
17
+ outputs:
18
+ package_name: ${{ steps.build_artifacts.outputs.package_name }}
19
+ package_version: ${{ steps.build_artifacts.outputs.package_version }}
20
+
21
+ steps:
22
+ - uses: actions/checkout@v4
23
+ with:
24
+ persist-credentials: false
25
+ - name: Set up Python
26
+ uses: actions/setup-python@v5
27
+ with:
28
+ python-version: "3.x"
29
+ - name: Install pypa/build
30
+ run: >-
31
+ python3 -m pip install build hatchling hatch-vcs --user
32
+ - name: Build a binary wheel and a source tarball
33
+ id: build_artifacts
34
+ run: |
35
+ python3 -m build
36
+ package_name=$(hatchling metadata name)
37
+ package_version=$(hatchling version)
38
+ echo "package_name=$package_name" >> $GITHUB_OUTPUT
39
+ echo "package_version=$package_version" >> $GITHUB_OUTPUT
40
+ - name: Store the distribution packages
41
+ uses: actions/upload-artifact@v4
42
+ with:
43
+ name: python-package-distributions
44
+ path: dist/
45
+
46
+ publish-to-pypi:
47
+ name: >-
48
+ Publish Python 🐍 distribution 📦 to PyPI
49
+ if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes
50
+ needs:
51
+ - build
52
+ runs-on: ubuntu-latest
53
+ environment:
54
+ name: pypi
55
+ url: https://pypi.org/p/${{ needs.build.outputs.package_name }}
56
+ permissions:
57
+ id-token: write # IMPORTANT: mandatory for trusted publishing
58
+
59
+ steps:
60
+ - name: Download all the dists
61
+ uses: actions/download-artifact@v4
62
+ with:
63
+ name: python-package-distributions
64
+ path: dist/
65
+ - name: Publish distribution 📦 to PyPI
66
+ uses: pypa/gh-action-pypi-publish@release/v1
67
+
68
+ github-release:
69
+ name: >-
70
+ Sign the Python 🐍 distribution 📦 with Sigstore
71
+ and upload them to GitHub Release
72
+ needs:
73
+ - publish-to-pypi
74
+ runs-on: ubuntu-latest
75
+
76
+ permissions:
77
+ contents: write # IMPORTANT: mandatory for making GitHub Releases
78
+ id-token: write # IMPORTANT: mandatory for sigstore
79
+
80
+ steps:
81
+ - name: Download all the dists
82
+ uses: actions/download-artifact@v4
83
+ with:
84
+ name: python-package-distributions
85
+ path: dist/
86
+ - name: Sign the dists with Sigstore
87
+ uses: sigstore/gh-action-sigstore-python@v3.0.0
88
+ with:
89
+ inputs: >-
90
+ ./dist/*.tar.gz
91
+ ./dist/*.whl
92
+ - name: Delete the stub of Release
93
+ env:
94
+ GITHUB_TOKEN: ${{ github.token }}
95
+ run: >-
96
+ gh release delete
97
+ "$GITHUB_REF_NAME"
98
+ --repo "$GITHUB_REPOSITORY"
99
+ - name: Create GitHub Release
100
+ env:
101
+ GITHUB_TOKEN: ${{ github.token }}
102
+ run: |
103
+ gh release create "$GITHUB_REF_NAME" \
104
+ --title "Release "$GITHUB_REF_NAME" \
105
+ --generate-notes \
106
+ --repo "$GITHUB_REPOSITORY"
107
+ - name: Upload artifact signatures to GitHub Release
108
+ env:
109
+ GITHUB_TOKEN: ${{ github.token }}
110
+ # Upload to GitHub Release using the `gh` CLI.
111
+ # `dist/` contains the built packages, and the
112
+ # sigstore-produced signatures and certificates.
113
+ run: >-
114
+ gh release upload
115
+ "$GITHUB_REF_NAME" dist/**
116
+ --repo "$GITHUB_REPOSITORY"
117
+
118
+ publish-to-testpypi:
119
+ name: Publish Python 🐍 distribution 📦 to TestPyPI
120
+ # we only publish to TestPyPI on tag pushes or merge to main
121
+ if: startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main'
122
+ needs:
123
+ - build
124
+ runs-on: ubuntu-latest
125
+
126
+ environment:
127
+ name: testpypi
128
+ url: https://test.pypi.org/p/${{ needs.build.outputs.package_name }}
129
+
130
+ permissions:
131
+ id-token: write # IMPORTANT: mandatory for trusted publishing
132
+
133
+ steps:
134
+ - name: Download all the dists
135
+ uses: actions/download-artifact@v4
136
+ with:
137
+ name: python-package-distributions
138
+ path: dist/
139
+ - name: Publish distribution 📦 to TestPyPI
140
+ uses: pypa/gh-action-pypi-publish@release/v1
141
+ with:
142
+ repository-url: https://test.pypi.org/legacy/
@@ -0,0 +1,76 @@
1
+ *.py[cod]
2
+ __pycache__
3
+
4
+ # Temp files
5
+ .*.sw[po]
6
+ *~
7
+ *.bak
8
+ .DS_Store
9
+
10
+ # C extensions
11
+ *.so
12
+
13
+ # Build and package files
14
+ *.egg
15
+ *.egg-info
16
+ .bootstrap
17
+ .build
18
+ .cache
19
+ .eggs
20
+ .env
21
+ .installed.cfg
22
+ .ve
23
+ bin
24
+ build
25
+ develop-eggs
26
+ dist
27
+ eggs
28
+ lib
29
+ lib64
30
+ parts
31
+ pip-wheel-metadata/
32
+ pyvenv*/
33
+ sdist
34
+ var
35
+ venv*/
36
+ .venv*/
37
+ wheelhouse
38
+ src/docs2vecs/_version.py
39
+
40
+ # Installer logs
41
+ pip-log.txt
42
+
43
+ # Unit test / coverage reports
44
+ .benchmarks
45
+ .coverage
46
+ .coverage.*
47
+ .pytest
48
+ .pytest_cache/
49
+ .tox
50
+ coverage.xml
51
+ htmlcov
52
+ nosetests.xml
53
+
54
+ # Translations
55
+ *.mo
56
+
57
+ # Buildout
58
+ .mr.developer.cfg
59
+
60
+ # IDE project files
61
+ *.iml
62
+ *.komodoproject
63
+ .idea
64
+ .project
65
+ .pydevproject
66
+ .vscode
67
+
68
+ # Complexity
69
+ output/*.html
70
+ output/*/index.html
71
+
72
+ # Sphinx
73
+ docs/_build
74
+
75
+ # Mypy Cache
76
+ .mypy_cache/
@@ -0,0 +1 @@
1
+ 3.11
@@ -0,0 +1,90 @@
1
+ ### Community guidelines
2
+ * Be respectful to others
3
+ * Be appreciative and welcoming
4
+ * Don't be judgmental
5
+ * Be patient and supportive to newcomers
6
+ * Value each contribution, even if it's not perfect - we can work as a team to benefit even from a failed attempt as we can learn from it!
7
+ * Look after one another - we are community of like-minded people who care about others, not only about ticking the boxes
8
+ * A challenge is not a bad thing, as it leads to expanding the horizons, being to competitive leads to unhealthy situations - be reasonable here
9
+
10
+ ### Code conventions
11
+ Please follow the standard python rules if possible:
12
+ * existing conventions
13
+ * [PEP8](https://www.python.org/dev/peps/pep-0008/)
14
+ * [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html)
15
+
16
+ There is always room for opportunistic refactoring, but be careful and ensure the cosmetic changes have no adverse impact on performance or readability.
17
+
18
+ ### Testing conventions
19
+ The project comes which a suite of unit tests covering most of the aspects. Make sure you pass all the existing tests locally before submitting the PR. Furthermore, any new feature, flow or the amendment of the existing one should be reflected in newly added unit tests or the update of the existing tests depending on the context.
20
+
21
+ Please follow [pytest good practices](https://docs.pytest.org/en/stable/goodpractices.html). You can also draw some inspirations from [this article](https://realpython.com/pytest-python-testing/).
22
+
23
+ Make sure you test the syntax before submitting the PR. Run a local build if you can and verify if all the new extra resources are also checked in.
24
+
25
+ ### Branching conventions
26
+ We are working with a single master branch and one release branch to make is simple.
27
+
28
+ ### Commit-message conventions
29
+ * Prefix each commit with the Jira/GitHub Issue ticket number if possible i.e. [ABC-123] New package nnn added to allow running bbb
30
+ * Provide a high level summary of the changes. Try to be concise.
31
+ * Make the commit messages meaningful. Don't skip them. They may be helpful during the code review and act as a passive documentation going forwards.
32
+
33
+ ### Steps for creating good pull requests
34
+ * State your intent is very clearly
35
+ * If there is a need to provide a thorough explanation or refer to external sources, please do it - it will help in the review
36
+ * Use the Jira/GitHub Issue ticket as a prefix in the PR title to ensure we get nice cross-references
37
+ * If you are unsure about certain aspects, don't be scared of asking on the available forums ahead of creating the PR.
38
+ * If you are aware of some drawback of the changes introduced be transparent about it, the reviewers will weigh pros and cons and your contribution may still be accepted
39
+ * To stick to a reasonable number of commits in your PR, you may want to squash some of them using [the git history rewriting technique](https://git-scm.com/book/en/v2/Git-Tools-Rewriting-History)
40
+ * You can use the PR as a medium for the conversation between yourself and the project maintainers. You can prefix the PR with a meaningful tag eg. [IDEA], [SUGGESTION], [REMARK] etc. In such case your PR may never be integrated if what you are proposing is not in line with the general direction the project is going to. However, it would be still a valuable resource to track the discussion that took place and it may save time for somebody who is heading in a similar direction.
41
+
42
+ ### Expected timelines(SLAs) for the code review and the integration
43
+ * The PRs should be reviewed within 1 week at least
44
+ * The integration happens immediately after the PR is approved and merged into the target branch
45
+
46
+ ### How to submit feature requests
47
+ * You may want to discuss the feature on the teams channel or other forums availabe for the project
48
+ * Use Jra board or GitHub Issues associated to this project
49
+ * Link the PR(when the contribution is planned) with the Jira ticket/GitHub issue if possible - it may give us more context and will make the case for the change stronger
50
+ * Please be thorough with the description
51
+ * Highlight a reasonable timescale you wish the feature to be integrated within - it is helpful when prioritizing
52
+
53
+ ### How to submit bug reports
54
+ * Check carefully the documentaion and by asking on the available forums if the behaviour you are experiencing is expected or if it is a bug
55
+ * Use Issues board associated to the project
56
+ * Link the PR(when the contribution is planned) with the bug report request if possible - it may give us more context
57
+ * Please be thorough with the description
58
+ * Highlight a reasonable timescale you wish the feature to be integrated within - it is helpful when prioritizing
59
+
60
+ ### How to submit security issue reports
61
+ * Engage with project maintainers. Do not publicly disclose anything before the patch is delivered.
62
+
63
+ ### How to write documentation
64
+ * README.md and GETTINGSTARTED.md are where we describe the overview, the usage and the development practices
65
+ * Use [markdown syntax](https://www.markdownguide.org/basic-syntax/) which is widely supported in the GitHub, BitBucket WebUI as well as in many IDEs
66
+ * Use plain English and check your spelling prior to committing the change
67
+ * Remember that good documentation is essential, so take time to do it properly
68
+
69
+ ### Dependencies
70
+ All the development dependencies are incorporated into [pyproject.toml](./pyproject.toml).
71
+
72
+ ### Build process schedule
73
+ The deliverable(python wheel) is built as soon as the PR is created, modified or merged into release(main) branch.
74
+
75
+ ### Sprint schedule
76
+ There is no specific scrum setup here. All the changes are worked on in a best effort mode.
77
+
78
+ ### Road map
79
+ There is no roadmap yet. One will be created if there is a need for it.
80
+
81
+ ### When the repositories will be closed to contributions
82
+ At this stage the repositories never get frozen.
83
+
84
+ ### Time reporting
85
+ There is no budget behind this project. Potential contributors need to negotiate it with their line or project managers.
86
+
87
+ ### Helpful links, information, and documentation
88
+ * [Markdown syntax](https://www.markdownguide.org/basic-syntax/)
89
+ * [PEP8](https://www.python.org/dev/peps/pep-0008/)
90
+ * [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html)
@@ -0,0 +1,29 @@
1
+ ARG SOURCE_IMAGE_NAME=ubi9/ubi
2
+ FROM registry.access.redhat.com/${SOURCE_IMAGE_NAME}:latest
3
+
4
+ ARG TARGETPLATFORM
5
+ ARG TARGETARCH
6
+
7
+ COPY ./etc/ca-bundle.crt /etc/pki/ca-trust/source/anchors/
8
+ COPY ./ /tmp/build
9
+
10
+ RUN echo "sslverify=0" >> /etc/dnf/dnf.conf \
11
+ && dnf install -y openssl ca-certificates gcc git \
12
+ && if [ "$TARGETPLATFORM" == "linux/arm64" ]; then\
13
+ # Creating symlink to /usr/bin/aarch64-linux-gnu-gcc as it is assumed by cython(perhaps worth logging an issue with them)
14
+ ln -sf /usr/bin/gcc /usr/bin/aarch64-linux-gnu-gcc;\
15
+ fi \
16
+ # Remove the insecure setting after install:
17
+ && sed -i '/sslverify=0/d' /etc/dnf/dnf.conf \
18
+ && update-ca-trust extract
19
+
20
+ RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
21
+ && source $HOME/.local/bin/env \
22
+ && uv python install 3.11 \
23
+ && uv venv \
24
+ && cd /tmp/build \
25
+ && uv --native-tls pip install --no-cache . \
26
+ && uv --native-tls run --script /tmp/build/etc/fetchDefaultModels.py \
27
+ && rm -rf /tmp/build
28
+
29
+ ENTRYPOINT ["/root/.local/bin/uv", "--native-tls", "run", "docs2vecs"]
@@ -0,0 +1,9 @@
1
+ MIT License
2
+
3
+ Copyright (c), Amadeus
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
+
7
+ The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.