sinapsis-huggingface 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/PKG-INFO +51 -13
  2. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/README.md +50 -12
  3. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface.egg-info/PKG-INFO +51 -13
  4. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface.egg-info/SOURCES.txt +6 -1
  5. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/__init__.py +3 -0
  6. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/base_transformers.py +11 -0
  7. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/image_to_text_transformers.py +1 -0
  8. sinapsis_huggingface-0.2.0/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/pali_gemma/pali_gemma_base.py +97 -0
  9. sinapsis_huggingface-0.2.0/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/pali_gemma/pali_gemma_detection.py +124 -0
  10. sinapsis_huggingface-0.2.0/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/pali_gemma/pali_gemma_inference.py +260 -0
  11. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/speech_to_text_transformers.py +1 -0
  12. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/summarization_transformers.py +1 -0
  13. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/text_to_speech_transformers.py +2 -1
  14. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/translation_transformers.py +1 -0
  15. sinapsis_huggingface-0.2.0/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/thirdparty/__init__.py +0 -0
  16. sinapsis_huggingface-0.2.0/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/thirdparty/helpers.py +70 -0
  17. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/pyproject.toml +1 -1
  18. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/LICENSE +0 -0
  19. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface.egg-info/dependency_links.txt +0 -0
  20. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface.egg-info/requires.txt +0 -0
  21. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface.egg-info/top_level.txt +0 -0
  22. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/__init__.py +0 -0
  23. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/__init__.py +0 -0
  24. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/base_diffusers.py +0 -0
  25. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/image_to_image_diffusers.py +0 -0
  26. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/image_to_video_gen_xl_diffusers.py +0 -0
  27. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/inpainting_diffusers.py +0 -0
  28. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_diffusers/src/sinapsis_huggingface_diffusers/templates/text_to_image_diffusers.py +0 -0
  29. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/__init__.py +0 -0
  30. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/templates/__init__.py +0 -0
  31. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/templates/hugging_face_embedding_extractor.py +0 -0
  32. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/templates/speaker_embedding_from_audio.py +0 -0
  33. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_embeddings/src/sinapsis_huggingface_embeddings/templates/speaker_embedding_from_dataset.py +0 -0
  34. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/__init__.py +0 -0
  35. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/helpers/__init__.py +0 -0
  36. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/helpers/grounding_dino_keys.py +0 -0
  37. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/templates/__init__.py +0 -0
  38. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/templates/grounding_dino.py +0 -0
  39. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_grounding_dino/src/sinapsis_huggingface_grounding_dino/templates/grounding_dino_classification.py +0 -0
  40. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/__init__.py +0 -0
  41. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/helpers/__init__.py +0 -0
  42. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/helpers/text_to_sentences.py +0 -0
  43. {sinapsis_huggingface-0.1.0 → sinapsis_huggingface-0.2.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sinapsis-huggingface
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: Package for HuggingFace-based templates
5
5
  Author-email: SinapsisAI <dev@sinapsis.tech>
6
6
  License: GNU AFFERO GENERAL PUBLIC LICENSE
@@ -822,22 +822,28 @@ The **Sinapsis web applications** provide an interactive way to explore and expe
822
822
  > [!IMPORTANT]
823
823
  > To run any of the apps, you first need to clone this repo:
824
824
 
825
+ ```bash
826
+ git clone git@github.com:Sinapsis-ai/sinapsis-huggingface.git
827
+ cd sinapsis-huggingface
828
+ ```
829
+
825
830
  > [!NOTE]
826
831
  > If you'd like to enable external app sharing in Gradio, `export GRADIO_SHARE_APP=True`
827
832
 
828
833
  > [!NOTE]
829
834
  > Agent configuration can be changed through the AGENT_CONFIG_PATH env var. You can check the available configurations in each package configs folder.
830
835
 
836
+ > [!IMPORTANT]
837
+ > Please make sure you have a valid huggingface access token in order to run the paligemma webapp. For further instructions on how to create an access token see
838
+ https://huggingface.co/docs/transformers.js/en/guides/private
839
+
840
+
831
841
 
832
- ```bash
833
- git clone git@github.com:Sinapsis-ai/sinapsis-huggingface.git
834
- cd sinapsis-huggingface
835
- ```
836
842
 
837
843
  <details>
838
844
  <summary id="docker"><strong><span style="font-size: 1.4em;">🐳 Build with Docker</span></strong></summary>
839
845
 
840
- **IMPORTANT** The docker image depends on the sinapsis-nvidia:base image. To build it, refer to the [official sinapsis documentation]([https://](https://github.com/Sinapsis-ai/sinapsis?tab=readme-ov-file#docker)
846
+ **IMPORTANT** The docker image depends on the sinapsis-nvidia:base image. To build it, refer to the [official sinapsis documentation](https://github.com/Sinapsis-AI/sinapsis/blob/main/README.md#docker)
841
847
 
842
848
 
843
849
  1. **Build the sinapsis-huggingface image**:
@@ -845,17 +851,35 @@ cd sinapsis-huggingface
845
851
  docker compose -f docker/compose.yaml build
846
852
  ```
847
853
  2. **Start the container**:
854
+
855
+ For Diffusers app
848
856
  ```bash
849
857
  docker compose -f docker/compose_diffusers.yaml up sinapsis-huggingface-diffusers-gradio -d
850
858
  ```
851
- **NOTE**: There is also a service to deploy the vision app. To do so, use:
859
+ For Grounding-Dino app
852
860
  ```bash
853
861
  docker compose -f docker/compose_vision.yaml up sinapsis-huggingface-vision-gradio -d
854
862
  ```
863
+ For Paligemma app
864
+
865
+ ```bash
866
+ export HF_TOKEN="your_huggingface_token"
867
+ docker compose -f docker/compose_pali_gemma.yaml up sinapsis-huggingface-paligemma-gradio -d
868
+ ```
855
869
  3. **Check the status**:
870
+
871
+ For Diffusers app
856
872
  ```bash
857
873
  docker logs -f sinapsis-huggingface-diffusers-gradio
858
874
  ```
875
+ For Grounding-Dino app
876
+ ```bash
877
+ docker logs -f sinapsis-huggingface-vision-gradio
878
+ ```
879
+ For Paligemma app
880
+ ```bash
881
+ docker logs -f sinapsis-huggingface-paligemma-gradio
882
+ ```
859
883
  **NOTE**: If using the vision app, please change the name of the service accordingly
860
884
 
861
885
  4. **The logs will display the URL to access the webapp, e.g.,**:
@@ -865,9 +889,19 @@ Running on local URL: http://127.0.0.1:7860
865
889
  **NOTE**: The local URL can be different, please check the logs
866
890
 
867
891
  5. **To stop the app**:
892
+
893
+ For Diffusers app
868
894
  ```bash
869
895
  docker compose -f docker/compose_diffusers.yaml down
870
896
  ```
897
+ For Grounding-Dino app
898
+ ```bash
899
+ docker compose -f docker/compose_vision.yaml down
900
+ ```
901
+ For Paligemma app
902
+ ```bash
903
+ docker compose -f docker/compose_pali_gemma.yaml down
904
+ ```
871
905
  </details>
872
906
 
873
907
  <details>
@@ -886,19 +920,23 @@ uv pip install sinapsis-huggingface[all] --extra-index-url https://pypi.sinapsis
886
920
  ```
887
921
  3. Run the webapp.
888
922
 
923
+ For Diffusers app
889
924
  ```bash
890
925
  uv run webapps/diffusers_demo.py
891
926
  ```
892
-
893
- 4. The terminal will display the URL to access the webapp, e.g., :
927
+ For Grounding-Dino app
894
928
  ```bash
895
- Running on local URL: http://127.0.0.1:7860
929
+ uv run webapps/vision_demo.py
930
+ ```
931
+ For Paligemma app
932
+ ```bash
933
+ export HF_TOKEN="your_huggingface_token"
934
+ uv run webapps/paligemma_demo.py
896
935
  ```
897
936
 
898
- **NOTE**: If you want to try the vision app, in step 5 change the command to:
899
-
937
+ 4. The terminal will display the URL to access the webapp, e.g., :
900
938
  ```bash
901
- python webapps/vision_demo.py
939
+ Running on local URL: http://127.0.0.1:7860
902
940
  ```
903
941
 
904
942
  </details>
@@ -130,22 +130,28 @@ The **Sinapsis web applications** provide an interactive way to explore and expe
130
130
  > [!IMPORTANT]
131
131
  > To run any of the apps, you first need to clone this repo:
132
132
 
133
+ ```bash
134
+ git clone git@github.com:Sinapsis-ai/sinapsis-huggingface.git
135
+ cd sinapsis-huggingface
136
+ ```
137
+
133
138
  > [!NOTE]
134
139
  > If you'd like to enable external app sharing in Gradio, `export GRADIO_SHARE_APP=True`
135
140
 
136
141
  > [!NOTE]
137
142
  > Agent configuration can be changed through the AGENT_CONFIG_PATH env var. You can check the available configurations in each package configs folder.
138
143
 
144
+ > [!IMPORTANT]
145
+ > Please make sure you have a valid huggingface access token in order to run the paligemma webapp. For further instructions on how to create an access token see
146
+ https://huggingface.co/docs/transformers.js/en/guides/private
147
+
148
+
139
149
 
140
- ```bash
141
- git clone git@github.com:Sinapsis-ai/sinapsis-huggingface.git
142
- cd sinapsis-huggingface
143
- ```
144
150
 
145
151
  <details>
146
152
  <summary id="docker"><strong><span style="font-size: 1.4em;">🐳 Build with Docker</span></strong></summary>
147
153
 
148
- **IMPORTANT** The docker image depends on the sinapsis-nvidia:base image. To build it, refer to the [official sinapsis documentation]([https://](https://github.com/Sinapsis-ai/sinapsis?tab=readme-ov-file#docker)
154
+ **IMPORTANT** The docker image depends on the sinapsis-nvidia:base image. To build it, refer to the [official sinapsis documentation](https://github.com/Sinapsis-AI/sinapsis/blob/main/README.md#docker)
149
155
 
150
156
 
151
157
  1. **Build the sinapsis-huggingface image**:
@@ -153,17 +159,35 @@ cd sinapsis-huggingface
153
159
  docker compose -f docker/compose.yaml build
154
160
  ```
155
161
  2. **Start the container**:
162
+
163
+ For Diffusers app
156
164
  ```bash
157
165
  docker compose -f docker/compose_diffusers.yaml up sinapsis-huggingface-diffusers-gradio -d
158
166
  ```
159
- **NOTE**: There is also a service to deploy the vision app. To do so, use:
167
+ For Grounding-Dino app
160
168
  ```bash
161
169
  docker compose -f docker/compose_vision.yaml up sinapsis-huggingface-vision-gradio -d
162
170
  ```
171
+ For Paligemma app
172
+
173
+ ```bash
174
+ export HF_TOKEN="your_huggingface_token"
175
+ docker compose -f docker/compose_pali_gemma.yaml up sinapsis-huggingface-paligemma-gradio -d
176
+ ```
163
177
  3. **Check the status**:
178
+
179
+ For Diffusers app
164
180
  ```bash
165
181
  docker logs -f sinapsis-huggingface-diffusers-gradio
166
182
  ```
183
+ For Grounding-Dino app
184
+ ```bash
185
+ docker logs -f sinapsis-huggingface-vision-gradio
186
+ ```
187
+ For Paligemma app
188
+ ```bash
189
+ docker logs -f sinapsis-huggingface-paligemma-gradio
190
+ ```
167
191
  **NOTE**: If using the vision app, please change the name of the service accordingly
168
192
 
169
193
  4. **The logs will display the URL to access the webapp, e.g.,**:
@@ -173,9 +197,19 @@ Running on local URL: http://127.0.0.1:7860
173
197
  **NOTE**: The local URL can be different, please check the logs
174
198
 
175
199
  5. **To stop the app**:
200
+
201
+ For Diffusers app
176
202
  ```bash
177
203
  docker compose -f docker/compose_diffusers.yaml down
178
204
  ```
205
+ For Grounding-Dino app
206
+ ```bash
207
+ docker compose -f docker/compose_vision.yaml down
208
+ ```
209
+ For Paligemma app
210
+ ```bash
211
+ docker compose -f docker/compose_pali_gemma.yaml down
212
+ ```
179
213
  </details>
180
214
 
181
215
  <details>
@@ -194,19 +228,23 @@ uv pip install sinapsis-huggingface[all] --extra-index-url https://pypi.sinapsis
194
228
  ```
195
229
  3. Run the webapp.
196
230
 
231
+ For Diffusers app
197
232
  ```bash
198
233
  uv run webapps/diffusers_demo.py
199
234
  ```
200
-
201
- 4. The terminal will display the URL to access the webapp, e.g., :
235
+ For Grounding-Dino app
202
236
  ```bash
203
- Running on local URL: http://127.0.0.1:7860
237
+ uv run webapps/vision_demo.py
238
+ ```
239
+ For Paligemma app
240
+ ```bash
241
+ export HF_TOKEN="your_huggingface_token"
242
+ uv run webapps/paligemma_demo.py
204
243
  ```
205
244
 
206
- **NOTE**: If you want to try the vision app, in step 5 change the command to:
207
-
245
+ 4. The terminal will display the URL to access the webapp, e.g., :
208
246
  ```bash
209
- python webapps/vision_demo.py
247
+ Running on local URL: http://127.0.0.1:7860
210
248
  ```
211
249
 
212
250
  </details>
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sinapsis-huggingface
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: Package for HuggingFace-based templates
5
5
  Author-email: SinapsisAI <dev@sinapsis.tech>
6
6
  License: GNU AFFERO GENERAL PUBLIC LICENSE
@@ -822,22 +822,28 @@ The **Sinapsis web applications** provide an interactive way to explore and expe
822
822
  > [!IMPORTANT]
823
823
  > To run any of the apps, you first need to clone this repo:
824
824
 
825
+ ```bash
826
+ git clone git@github.com:Sinapsis-ai/sinapsis-huggingface.git
827
+ cd sinapsis-huggingface
828
+ ```
829
+
825
830
  > [!NOTE]
826
831
  > If you'd like to enable external app sharing in Gradio, `export GRADIO_SHARE_APP=True`
827
832
 
828
833
  > [!NOTE]
829
834
  > Agent configuration can be changed through the AGENT_CONFIG_PATH env var. You can check the available configurations in each package configs folder.
830
835
 
836
+ > [!IMPORTANT]
837
+ > Please make sure you have a valid huggingface access token in order to run the paligemma webapp. For further instructions on how to create an access token see
838
+ https://huggingface.co/docs/transformers.js/en/guides/private
839
+
840
+
831
841
 
832
- ```bash
833
- git clone git@github.com:Sinapsis-ai/sinapsis-huggingface.git
834
- cd sinapsis-huggingface
835
- ```
836
842
 
837
843
  <details>
838
844
  <summary id="docker"><strong><span style="font-size: 1.4em;">🐳 Build with Docker</span></strong></summary>
839
845
 
840
- **IMPORTANT** The docker image depends on the sinapsis-nvidia:base image. To build it, refer to the [official sinapsis documentation]([https://](https://github.com/Sinapsis-ai/sinapsis?tab=readme-ov-file#docker)
846
+ **IMPORTANT** The docker image depends on the sinapsis-nvidia:base image. To build it, refer to the [official sinapsis documentation](https://github.com/Sinapsis-AI/sinapsis/blob/main/README.md#docker)
841
847
 
842
848
 
843
849
  1. **Build the sinapsis-huggingface image**:
@@ -845,17 +851,35 @@ cd sinapsis-huggingface
845
851
  docker compose -f docker/compose.yaml build
846
852
  ```
847
853
  2. **Start the container**:
854
+
855
+ For Diffusers app
848
856
  ```bash
849
857
  docker compose -f docker/compose_diffusers.yaml up sinapsis-huggingface-diffusers-gradio -d
850
858
  ```
851
- **NOTE**: There is also a service to deploy the vision app. To do so, use:
859
+ For Grounding-Dino app
852
860
  ```bash
853
861
  docker compose -f docker/compose_vision.yaml up sinapsis-huggingface-vision-gradio -d
854
862
  ```
863
+ For Paligemma app
864
+
865
+ ```bash
866
+ export HF_TOKEN="your_huggingface_token"
867
+ docker compose -f docker/compose_pali_gemma.yaml up sinapsis-huggingface-paligemma-gradio -d
868
+ ```
855
869
  3. **Check the status**:
870
+
871
+ For Diffusers app
856
872
  ```bash
857
873
  docker logs -f sinapsis-huggingface-diffusers-gradio
858
874
  ```
875
+ For Grounding-Dino app
876
+ ```bash
877
+ docker logs -f sinapsis-huggingface-vision-gradio
878
+ ```
879
+ For Paligemma app
880
+ ```bash
881
+ docker logs -f sinapsis-huggingface-paligemma-gradio
882
+ ```
859
883
  **NOTE**: If using the vision app, please change the name of the service accordingly
860
884
 
861
885
  4. **The logs will display the URL to access the webapp, e.g.,**:
@@ -865,9 +889,19 @@ Running on local URL: http://127.0.0.1:7860
865
889
  **NOTE**: The local URL can be different, please check the logs
866
890
 
867
891
  5. **To stop the app**:
892
+
893
+ For Diffusers app
868
894
  ```bash
869
895
  docker compose -f docker/compose_diffusers.yaml down
870
896
  ```
897
+ For Grounding-Dino app
898
+ ```bash
899
+ docker compose -f docker/compose_vision.yaml down
900
+ ```
901
+ For Paligemma app
902
+ ```bash
903
+ docker compose -f docker/compose_pali_gemma.yaml down
904
+ ```
871
905
  </details>
872
906
 
873
907
  <details>
@@ -886,19 +920,23 @@ uv pip install sinapsis-huggingface[all] --extra-index-url https://pypi.sinapsis
886
920
  ```
887
921
  3. Run the webapp.
888
922
 
923
+ For Diffusers app
889
924
  ```bash
890
925
  uv run webapps/diffusers_demo.py
891
926
  ```
892
-
893
- 4. The terminal will display the URL to access the webapp, e.g., :
927
+ For Grounding-Dino app
894
928
  ```bash
895
- Running on local URL: http://127.0.0.1:7860
929
+ uv run webapps/vision_demo.py
930
+ ```
931
+ For Paligemma app
932
+ ```bash
933
+ export HF_TOKEN="your_huggingface_token"
934
+ uv run webapps/paligemma_demo.py
896
935
  ```
897
936
 
898
- **NOTE**: If you want to try the vision app, in step 5 change the command to:
899
-
937
+ 4. The terminal will display the URL to access the webapp, e.g., :
900
938
  ```bash
901
- python webapps/vision_demo.py
939
+ Running on local URL: http://127.0.0.1:7860
902
940
  ```
903
941
 
904
942
  </details>
@@ -33,4 +33,9 @@ packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers
33
33
  packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/speech_to_text_transformers.py
34
34
  packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/summarization_transformers.py
35
35
  packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/text_to_speech_transformers.py
36
- packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/translation_transformers.py
36
+ packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/translation_transformers.py
37
+ packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/pali_gemma/pali_gemma_base.py
38
+ packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/pali_gemma/pali_gemma_detection.py
39
+ packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/templates/pali_gemma/pali_gemma_inference.py
40
+ packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/thirdparty/__init__.py
41
+ packages/sinapsis_huggingface_transformers/src/sinapsis_huggingface_transformers/thirdparty/helpers.py
@@ -6,6 +6,9 @@ _root_lib_path = "sinapsis_huggingface_transformers.templates"
6
6
 
7
7
  _template_lookup = {
8
8
  "ImageToTextTransformers": f"{_root_lib_path}.image_to_text_transformers",
9
+ "PaliGemmaDetection": f"{_root_lib_path}.pali_gemma.pali_gemma_detection",
10
+ "PaliGemmaInference": f"{_root_lib_path}.pali_gemma.pali_gemma_inference",
11
+ "PaliGemmaSegmentation": f"{_root_lib_path}.pali_gemma.pali_gemma_segmentation",
9
12
  "SpeechToTextTransformers": f"{_root_lib_path}.speech_to_text_transformers",
10
13
  "SummarizationTransformers": f"{_root_lib_path}.summarization_transformers",
11
14
  "TextToSpeechTransformers": f"{_root_lib_path}.text_to_speech_transformers",
@@ -63,6 +63,17 @@ class TransformersBase(Template):
63
63
  self._TORCH_DTYPE = {"float16": torch.float16, "float32": torch.float32}
64
64
  self.task: str | None = None
65
65
  self._set_seed()
66
+
67
+ def setup_pipeline(self) -> None:
68
+ """Initialize and configure the HuggingFace Transformers processing pipeline.
69
+
70
+ Raises:
71
+ ValueError: If called before the task attribute is set. The task must be
72
+ defined by the child class before pipeline initialization.
73
+ """
74
+ if self.task is None:
75
+ raise ValueError("'task' must be assigned before pipeline setup")
76
+
66
77
  self.processor = self._initialize_processor()
67
78
  self.pipeline = self.initialize_pipeline()
68
79
 
@@ -38,6 +38,7 @@ class ImageToTextTransformers(TransformersBase):
38
38
  def __init__(self, attributes: TemplateAttributeType) -> None:
39
39
  super().__init__(attributes)
40
40
  self.task = "image-to-text"
41
+ self.setup_pipeline()
41
42
 
42
43
  @staticmethod
43
44
  def _convert_to_pil(image_content: Image.Image | np.ndarray) -> Image.Image:
@@ -0,0 +1,97 @@
1
+ # -*- coding: utf-8 -*-
2
+ from abc import abstractmethod
3
+ from typing import Any, ClassVar, Literal
4
+
5
+ import torch
6
+ from sinapsis_core.data_containers.data_packet import DataContainer
7
+ from sinapsis_core.template_base import (
8
+ Template,
9
+ TemplateAttributes,
10
+ TemplateAttributeType,
11
+ )
12
+ from sinapsis_core.utils.env_var_keys import SINAPSIS_CACHE_DIR
13
+ from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
14
+
15
+
16
+ class PaliGemmaBaseAttributes(TemplateAttributes):
17
+ """Base attributes for PaliGemma models.
18
+
19
+ Attributes:
20
+ model_path (str): Path to the pretrained PaliGemma model. Can be either:
21
+ - A Hugging Face model identifier (e.g. 'facebook/pali-gemma-7b')
22
+ - A local directory path containing the model files
23
+ processor_path (str): Path to the model processor/tokenizer. Can be either:
24
+ - A Hugging Face model identifier
25
+ - A local directory path containing the processor files
26
+ model_cache_dir (str): Directory for caching model files when downloading from Hugging Face.
27
+ device (Literal["cuda", "cpu"]): Device to run the model on. Defaults to cpu.
28
+ max_new_tokens (int): Maximum number of tokens to generate. Defaults to 200.
29
+ torch_dtype (Literal["float16", "float32"]): Model precision type. Defaults to float16.
30
+ """
31
+
32
+ model_path: str
33
+ processor_path: str
34
+ model_cache_dir: str = str(SINAPSIS_CACHE_DIR)
35
+ device: Literal["cuda", "cpu"] = "cpu"
36
+ max_new_tokens: int = 200
37
+ torch_dtype: Literal["float16", "float32"] = "float16"
38
+
39
+
40
+ class PaliGemmaBase(Template):
41
+ """Base class for PaliGemma implementations."""
42
+
43
+ AttributesBaseModel = PaliGemmaBaseAttributes
44
+ CATEGORY = "Transformers"
45
+ _TORCH_DTYPE: ClassVar[dict[str, Any]] = {"float16": torch.float16, "float32": torch.float32}
46
+
47
+ def __init__(self, attributes: TemplateAttributeType) -> None:
48
+ super().__init__(attributes)
49
+ self.model = self._setup_model()
50
+ self.processor = self._setup_processor()
51
+
52
+ def _setup_model(
53
+ self,
54
+ ) -> PaliGemmaForConditionalGeneration:
55
+ """Initialize model with proper device placement and precision settings.
56
+
57
+ Handles the loading of model components, configuring
58
+ it according to the specified device and precision requirements.
59
+
60
+ Returns:
61
+ PaliGemmaForConditionalGeneration: Initialized and configured model.
62
+ """
63
+
64
+ model = PaliGemmaForConditionalGeneration.from_pretrained(
65
+ self.attributes.model_path,
66
+ cache_dir=self.attributes.model_cache_dir,
67
+ torch_dtype=self._TORCH_DTYPE.get(self.attributes.torch_dtype),
68
+ ).to(self.attributes.device)
69
+
70
+ return model
71
+
72
+ def _setup_processor(self) -> AutoProcessor:
73
+ """Initialize processor with proper device placement and precision settings.
74
+
75
+ Handles the loading of processor components, configuring
76
+ it according to the specified cache and precision requirements.
77
+
78
+ Returns:
79
+ AutoProcessor: Initialized and configured processor.
80
+ """
81
+ processor = AutoProcessor.from_pretrained(
82
+ self.attributes.processor_path,
83
+ cache_dir=self.attributes.model_cache_dir,
84
+ torch_dtype=self._TORCH_DTYPE.get(self.attributes.torch_dtype),
85
+ )
86
+ return processor
87
+
88
+ @abstractmethod
89
+ def execute(self, container: DataContainer) -> DataContainer:
90
+ """Execute method to be implemented by child classes.
91
+
92
+ Args:
93
+ container (DataContainer): The input data container to be processed.
94
+
95
+ Returns:
96
+ DataContainer: The processed container with model outputs.
97
+ """
@@ -0,0 +1,124 @@
1
+ # -*- coding: utf-8 -*-
2
+ from dataclasses import dataclass
3
+
4
+ from sinapsis_core.data_containers.annotations import ImageAnnotations
5
+ from sinapsis_core.template_base import TemplateAttributeType
6
+ from sinapsis_huggingface_transformers.templates.pali_gemma.pali_gemma_inference import (
7
+ PaliGemmaInference,
8
+ PaliGemmaInferenceAttributes,
9
+ )
10
+ from sinapsis_huggingface_transformers.thirdparty.helpers import (
11
+ get_matches,
12
+ parse_label,
13
+ parse_location_tokens,
14
+ )
15
+
16
+
17
+ @dataclass(frozen=True)
18
+ class PaliGemmaDetectionKeys:
19
+ "Keys to use during detection"
20
+
21
+ detection_prompt: str = "detect {}"
22
+
23
+
24
+ class PaliGemmaDetectionAttributes(PaliGemmaInferenceAttributes):
25
+ """Configuration attributes for PaliGemma object detection tasks.
26
+
27
+ This class extends the base inference attributes to handle object detection specific configurations.
28
+
29
+ Attributes:
30
+ objects_to_detect (str | list[str]): Target objects to detect, can be a single string or list of strings
31
+ """
32
+
33
+ objects_to_detect: str | list[str]
34
+
35
+
36
+ class PaliGemmaDetection(PaliGemmaInference):
37
+ """Implementation of PaliGemma object detection pipeline.
38
+
39
+ The template inherits functionality from its base class, extending
40
+ the functionality to run inference on an image and to identify
41
+ the objects from the attributes.
42
+
43
+ Usage example:
44
+
45
+ agent:
46
+ name: my_test_agent
47
+ templates:
48
+ - template_name: InputTemplate
49
+ class_name: InputTemplate
50
+ attributes: {}
51
+ - template_name: PaliGemmaDetection
52
+ class_name: PaliGemmaDetection
53
+ template_input: InputTemplate
54
+ attributes:
55
+ model_path: '/path/to/paligemma/model'
56
+ processor_path: '`/path/to/processor'
57
+ model_cache_dir: /path/to/cache/dir
58
+ device: 'cuda'
59
+ max_new_tokens: 200
60
+ torch_dtype: float16
61
+ prompt: <image> caption en
62
+ objects_to_detect: 'object to detect'
63
+
64
+ """
65
+
66
+ AttributesBaseModel = PaliGemmaDetectionAttributes
67
+ KEYS = PaliGemmaDetectionKeys
68
+
69
+ def __init__(self, attributes: TemplateAttributeType) -> None:
70
+ super().__init__(attributes)
71
+
72
+ objects_str = self.initialize_objects_str()
73
+ self.prompt = self.KEYS.detection_prompt.format(objects_str)
74
+
75
+ def initialize_objects_str(self) -> str:
76
+ """
77
+ Initialize the objects to detect string according to the specified format.
78
+
79
+ Returns:
80
+ str: String enlisting the objects to be defined in the detection prompt.
81
+ """
82
+
83
+ if isinstance(self.attributes.objects_to_detect, str):
84
+ return self.attributes.objects_to_detect
85
+ return "; ".join(self.attributes.objects_to_detect)
86
+
87
+ def _format_text_for_prompt(self, text: str) -> str:
88
+ """Formats input text as a detection prompt.
89
+
90
+ Args:
91
+ text (str): Raw text content (expected to be objects to detect)
92
+
93
+ Returns:
94
+ str: Formatted detection prompt
95
+ """
96
+ return self.KEYS.detection_prompt.format(text)
97
+
98
+ def _create_annotation(
99
+ self, caption: str, confidence: float, image_shape: tuple[int, ...]
100
+ ) -> list[ImageAnnotations]:
101
+ """Creates structured annotations from detection model outputs.
102
+
103
+ Processes the model's output caption to extract bounding box coordinates
104
+ and object labels for each detected instance.
105
+
106
+ Args:
107
+ caption (str): Raw detection output from the model
108
+ confidence (float): Confidence score for the predictions
109
+ image_shape (tuple[int, ...]): Dimensions of the input image (height, width)
110
+
111
+ Returns:
112
+ list[ImageAnnotations]: List of annotations containing bounding boxes and labels
113
+ for each detected object
114
+ """
115
+ annotations = []
116
+ matches = get_matches(caption)
117
+
118
+ for match_coord in matches:
119
+ coords = parse_location_tokens(match_coord, image_shape)
120
+ label = parse_label(match_coord)
121
+ annotation = self.create_bbox_annotation(coords, label, confidence)
122
+ annotations.append(annotation)
123
+
124
+ return annotations
@@ -0,0 +1,260 @@
1
+ # -*- coding: utf-8 -*-
2
+ import numpy as np
3
+ import torch
4
+ from sinapsis_core.data_containers.annotations import BoundingBox, ImageAnnotations
5
+ from sinapsis_core.data_containers.data_packet import DataContainer, ImagePacket
6
+ from sinapsis_core.template_base import TemplateAttributeType
7
+ from sinapsis_data_visualization.helpers.detection_utils import bbox_xyxy_to_xywh
8
+ from sinapsis_huggingface_transformers.templates.pali_gemma.pali_gemma_base import (
9
+ PaliGemmaBase,
10
+ PaliGemmaBaseAttributes,
11
+ )
12
+ from transformers.generation.utils import GenerateOutput
13
+
14
+
15
+ class PaliGemmaInferenceAttributes(PaliGemmaBaseAttributes):
16
+ """Configuration attributes for PaliGemma inference.
17
+
18
+ Attributes:
19
+ prompt (str): Prompt to run the inference (default: "<image>caption en")
20
+
21
+ The <image> token is essential as it serves as a marker that tells the model where to look at the image
22
+ when processing the input. This token enables the model to understand the relationship between the visual
23
+ and textual components during processing.
24
+
25
+ Example prompts:
26
+ - "<image>caption en" -> Generates a basic caption in English
27
+ - "<image>What objects can you see in this image?" -> Lists objects in the image
28
+ """
29
+
30
+ prompt: str = "<image>caption en"
31
+
32
+
33
+ class PaliGemmaInference(PaliGemmaBase):
34
+ """Implementation of PaliGemma inference pipeline for image processing and caption generation.
35
+
36
+ This class handles the inference process for PaliGemma models, including image processing,
37
+ caption generation, and annotation creation. It supports both basic captioning and
38
+ detection/segmentation tasks.
39
+
40
+ Usage example:
41
+
42
+ agent:
43
+ name: my_test_agent
44
+ templates:
45
+ - template_name: InputTemplate
46
+ class_name: InputTemplate
47
+ attributes: {}
48
+ - template_name: PaliGemmaInference
49
+ class_name: PaliGemmaInference
50
+ template_input: InputTemplate
51
+ attributes:
52
+ model_path: '/path/to/paligemma/model'
53
+ processor_path: '`/path/to/processor'
54
+ model_cache_dir: /path/to/cache/dir
55
+ device: 'cuda'
56
+ max_new_tokens: 200
57
+ torch_dtype: float16
58
+ prompt: <image> caption en
59
+
60
+ """
61
+
62
+ AttributesBaseModel = PaliGemmaInferenceAttributes
63
+ INPUT_IDS = "input_ids"
64
+
65
+ def __init__(self, attributes: TemplateAttributeType) -> None:
66
+ super().__init__(attributes)
67
+ self.prompt = self.attributes.prompt
68
+
69
+ def _prepare_inputs(self, image_content: np.ndarray) -> dict:
70
+ """Prepares the input for model inference by processing the image and text prompt.
71
+
72
+ Args:
73
+ image_content (np.ndarray): Raw image content to be processed as a numpy array
74
+
75
+ Returns:
76
+ dict: Processed inputs containing:
77
+ - input_ids (torch.Tensor): Token IDs for the text prompt and image tokens
78
+ - attention_mask (torch.Tensor): Binary mask indicating valid input positions (1s)
79
+ - pixel_values (torch.Tensor): Processed image tensor with normalized pixel values
80
+ in shape (batch_size, channels, height, width)
81
+
82
+ Note:
83
+ - The format of the returns it's because uses PyTorch tensors (return_tensors="pt")
84
+ """
85
+
86
+ return self.processor(
87
+ images=image_content,
88
+ text=self.prompt,
89
+ return_tensors="pt",
90
+ ).to(self.attributes.device)
91
+
92
+ def _generate_caption(self, inputs: dict) -> torch.Tensor:
93
+ """Generates caption using the model.
94
+
95
+ Args:
96
+ inputs (dict): Processed model inputs for the processor, including input IDs of the image and prompt
97
+
98
+ Returns:
99
+ GeneratedCaptionOutput: A structured output containing:
100
+ - sequences: tensor with token IDs of the generated sequence
101
+ - scores: tuple of tensors with token prediction scores for each generation step
102
+ - logits: optional tensor with raw logits (None in this configuration)
103
+ - attentions: optional attention weights (None in this configuration)
104
+ - hidden_states: optional hidden states (None in this configuration)
105
+ - past_key_values: tuple of tensors containing past keys/values for attention mechanism
106
+
107
+ Configuration parameters:
108
+ - max_new_tokens: Maximum number of new tokens to generate
109
+ - return_dict_in_generate: Returns output as a structured dictionary
110
+ - output_scores: Includes prediction scores in the output
111
+ """
112
+ with torch.no_grad():
113
+ return self.model.generate(
114
+ **inputs,
115
+ max_new_tokens=self.attributes.max_new_tokens,
116
+ return_dict_in_generate=True,
117
+ output_scores=True,
118
+ )
119
+
120
+ @staticmethod
121
+ def _calculate_confidence_score(outputs: GenerateOutput) -> float:
122
+ """Calculates the confidence score from model generation outputs.
123
+
124
+ The confidence score is computed as the mean of the highest probability
125
+ for each generated token in the sequence.
126
+
127
+ Args:
128
+ outputs (GenerateOutput): Model generation output containing scores
129
+ for each generated token
130
+
131
+ Returns:
132
+ float: Average confidence score across all generated tokens
133
+ """
134
+ scores = torch.stack(outputs.scores)
135
+ probs = torch.softmax(scores, dim=-1)
136
+ token_confidences = torch.max(probs, dim=-1).values
137
+ return float(torch.mean(token_confidences).cpu())
138
+
139
+ def _decode_caption(self, outputs: GenerateOutput, input_len: int) -> str:
140
+ """Decodes the model output sequences into readable caption text.
141
+
142
+ Args:
143
+ outputs (GenerateOutput): Model generation output containing the
144
+ generated token sequences
145
+ input_len (int): Length of the input sequence to skip initial tokens
146
+
147
+ Returns:
148
+ str: Decoded caption text with special tokens removed
149
+ """
150
+ return self.processor.decode(outputs.sequences[0][input_len:], skip_special_tokens=True)
151
+
152
+ def _create_annotation(
153
+ self, caption: str, confidence: float, image_shape: tuple[int, ...]
154
+ ) -> list[ImageAnnotations]:
155
+ """Creates image annotations from the generated caption.
156
+
157
+ Args:
158
+ caption (str): Generated caption text
159
+ confidence (float): Confidence score for the prediction
160
+ image_shape (tuple[int, ...]): Shape of the input image
161
+
162
+ Returns:
163
+ list[ImageAnnotations]: List containing annotation with caption information
164
+ """
165
+
166
+ _, _ = self, image_shape
167
+ return [ImageAnnotations(text=caption, confidence_score=confidence)]
168
+
169
+ def _process_single_image(self, image_packet: ImagePacket) -> None:
170
+ """Processes a single image through the inference pipeline.
171
+
172
+ Args:
173
+ image_packet (ImagePacket): Container with image data and metadata
174
+
175
+ Returns:
176
+ None: Modifies the image_packet in place by adding annotations
177
+ """
178
+ inputs = self._prepare_inputs(image_packet.content)
179
+ outputs = self._generate_caption(inputs)
180
+ input_len = inputs[self.INPUT_IDS].shape[-1]
181
+ caption = self._decode_caption(outputs, input_len)
182
+ confidence = self._calculate_confidence_score(outputs)
183
+ annotations = self._create_annotation(caption, confidence, image_packet.content.shape)
184
+ image_packet.annotations.extend(annotations)
185
+
186
+ def _format_text_for_prompt(self, text: str) -> str:
187
+ """Formats the incoming text appropriately for the current task type.
188
+ Base implementation returns the text as-is, subclasses may override
189
+ to apply task-specific formatting.
190
+ Args:
191
+ text (str): Raw text content
192
+ Returns:
193
+ str: Formatted prompt text
194
+ """
195
+ _ = self
196
+ return text
197
+
198
+ def process_from_text_packet(self, container: DataContainer) -> None:
199
+ """
200
+ Extract prompts from the received list of text packets and use them to perform inference in each received image
201
+ packet.
202
+
203
+ Args:
204
+ container (DataContainer): Data-container with text and image packets to be processed.
205
+ """
206
+ for text_packet in container.texts:
207
+ self.prompt = self._format_text_for_prompt(text_packet.content)
208
+ if container.images:
209
+ for image_packet in container.images:
210
+ self._process_single_image(image_packet)
211
+
212
+ def process_from_prompt(self, container: DataContainer) -> None:
213
+ """
214
+ Perform inference in each received image packet using the prompt defined in template attributes.
215
+
216
+ Args:
217
+ container (DataContainer): Data-container with image packets to be processed.
218
+ """
219
+ if container.images:
220
+ for image_packet in container.images:
221
+ self._process_single_image(image_packet)
222
+
223
+ def execute(self, container: DataContainer) -> DataContainer:
224
+ """Executes the inference pipeline on a batch of images.
225
+
226
+ If text packets are present, uses each text as input for prompt formatting.
227
+ If no text packets exist, uses the default prompt from attributes.
228
+
229
+ Args:
230
+ container (DataContainer): Container with text and image packets
231
+
232
+ Returns:
233
+ DataContainer: Processed container with added annotations
234
+ """
235
+ if container.texts:
236
+ self.process_from_text_packet(container)
237
+ else:
238
+ self.process_from_prompt(container)
239
+
240
+ return container
241
+
242
+ @staticmethod
243
+ def create_bbox_annotation(coords: tuple[float, ...], label: str, confidence: float) -> ImageAnnotations:
244
+ """Creates bounding box annotation from coordinates and metadata.
245
+
246
+ Args:
247
+ coords (tuple[float, ...]): Coordinates (x0, y0, x1, y1)
248
+ label (str): Label for the detected object
249
+ confidence (float): Confidence score for the detection
250
+
251
+ Returns:
252
+ ImageAnnotations: Annotation object with bounding box information
253
+ """
254
+ x0, y0, x1, y1 = coords
255
+ x, y, w, h = bbox_xyxy_to_xywh([x0, y0, x1, y1])
256
+ return ImageAnnotations(
257
+ label_str=label,
258
+ confidence_score=confidence,
259
+ bbox=BoundingBox(x=x, y=y, w=w, h=h),
260
+ )
@@ -39,6 +39,7 @@ class SpeechToTextTransformers(TransformersBase):
39
39
  def __init__(self, attributes: TemplateAttributeType) -> None:
40
40
  super().__init__(attributes)
41
41
  self.task = "automatic-speech-recognition"
42
+ self.setup_pipeline()
42
43
 
43
44
  def transformation_method(self, container: DataContainer) -> DataContainer:
44
45
  """Speech recognition (speech-to-text) using a Transformers Pipeline.
@@ -38,6 +38,7 @@ class SummarizationTransformers(TransformersBase):
38
38
  def __init__(self, attributes: TemplateAttributeType) -> None:
39
39
  super().__init__(attributes)
40
40
  self.task = "summarization"
41
+ self.setup_pipeline()
41
42
 
42
43
  def transformation_method(self, container: DataContainer) -> DataContainer:
43
44
  """Summarize text using a Transformers Pipeline.
@@ -64,8 +64,9 @@ class TextToSpeechTransformers(TransformersBase):
64
64
 
65
65
  def __init__(self, attributes: TemplateAttributeType) -> None:
66
66
  super().__init__(attributes)
67
- self.sample_rate = self._get_sample_rate()
68
67
  self.task = "text-to-speech"
68
+ self.setup_pipeline()
69
+ self.sample_rate = self._get_sample_rate()
69
70
 
70
71
  def _get_sample_rate(self) -> int:
71
72
  """Retrieve the sample rate for the generated audio.
@@ -56,6 +56,7 @@ class TranslationTransformers(TransformersBase):
56
56
  def __init__(self, attributes: TemplateAttributeType) -> None:
57
57
  super().__init__(attributes)
58
58
  self.task = f"translation_{self.attributes.source_language}_to_{self.attributes.target_language}"
59
+ self.setup_pipeline()
59
60
 
60
61
  def transformation_method(self, container: DataContainer) -> DataContainer:
61
62
  """Translate text using a Transformers Pipeline.
@@ -0,0 +1,70 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ The constants and methods declared in this file are inspired in the following source:
4
+
5
+ https://github.com/google/generative-ai-docs/blob/main/site/en/gemma/docs/paligemma/inference-with-keras.ipynb
6
+
7
+ which is Licensed under the Apache License, Version 2.0.
8
+
9
+ """
10
+
11
+ import numpy as np
12
+ import regex as re
13
+
14
+ COORDS_PATTERN: str = r"<loc(?P<y0>\d\d\d\d)><loc(?P<x0>\d\d\d\d)><loc(?P<y1>\d\d\d\d)><loc(?P<x1>\d\d\d\d)>"
15
+ LABEL_PATTERN: str = r" (?P<label>.+?)( ;|$)"
16
+
17
+ DETECTION_PATTERN: str = COORDS_PATTERN + LABEL_PATTERN
18
+
19
+ LOCATION_KEYS: tuple[str, ...] = ("y0", "x0", "y1", "x1")
20
+ LOCATION_SCALE: float = 1024.0
21
+
22
+
23
+ def parse_location_tokens(match_coord: re.Match, image_shape: tuple[int, ...]) -> np.ndarray:
24
+ """Parses location tokens from model output into normalized coordinates.
25
+
26
+ Args:
27
+ match_coord (dict): Dictionary containing matched location tokens
28
+ image_shape (tuple[int, ...]): Shape of the input image
29
+
30
+ Returns:
31
+ np.ndarray: Normalized coordinates (x0, y0, x1, y1)
32
+ """
33
+ match_dict = match_coord.groupdict()
34
+ x0 = float(match_dict[LOCATION_KEYS[1]]) / LOCATION_SCALE * image_shape[1]
35
+ y0 = float(match_dict[LOCATION_KEYS[0]]) / LOCATION_SCALE * image_shape[0]
36
+ x1 = float(match_dict[LOCATION_KEYS[3]]) / LOCATION_SCALE * image_shape[1]
37
+ y1 = float(match_dict[LOCATION_KEYS[2]]) / LOCATION_SCALE * image_shape[0]
38
+ return np.array([x0, y0, x1, y1])
39
+
40
+
41
+ def parse_label(match_coord: re.Match) -> str:
42
+ """
43
+ Retrieves detection label from a regex Match object.
44
+
45
+
46
+ Args:
47
+ match_coord (Match): The Match object containing the label information.
48
+
49
+ Returns:
50
+ str: The detection label.
51
+ """
52
+ label = match_coord.groupdict().get("label")
53
+ if label is None:
54
+ return ""
55
+ return label.strip()
56
+
57
+
58
+ def get_matches(caption: str) -> re.Scanner:
59
+ """
60
+ Creates an iterable containing all the detection matches found in the
61
+ produced model caption.
62
+
63
+ Args:
64
+ caption (str): The caption produced by the paligemma model.
65
+
66
+ Returns:
67
+ Scanner: An iterable object containing all the regex matches.
68
+ """
69
+
70
+ return re.finditer(DETECTION_PATTERN, caption)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sinapsis-huggingface"
3
- version = "0.1.0"
3
+ version = "0.2.0"
4
4
  description = "Package for HuggingFace-based templates"
5
5
  authors = [
6
6
  {name = "SinapsisAI", email = "dev@sinapsis.tech"},