llama-stack 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. llama_stack/cli/stack/_list_deps.py +11 -7
  2. llama_stack/cli/stack/run.py +3 -25
  3. llama_stack/core/access_control/datatypes.py +78 -0
  4. llama_stack/core/configure.py +2 -2
  5. llama_stack/{distributions/meta-reference-gpu → core/connectors}/__init__.py +3 -1
  6. llama_stack/core/connectors/connectors.py +162 -0
  7. llama_stack/core/conversations/conversations.py +61 -58
  8. llama_stack/core/datatypes.py +54 -8
  9. llama_stack/core/library_client.py +60 -13
  10. llama_stack/core/prompts/prompts.py +43 -42
  11. llama_stack/core/routers/datasets.py +20 -17
  12. llama_stack/core/routers/eval_scoring.py +143 -53
  13. llama_stack/core/routers/inference.py +20 -9
  14. llama_stack/core/routers/safety.py +30 -42
  15. llama_stack/core/routers/vector_io.py +15 -7
  16. llama_stack/core/routing_tables/models.py +42 -3
  17. llama_stack/core/routing_tables/scoring_functions.py +19 -19
  18. llama_stack/core/routing_tables/shields.py +20 -17
  19. llama_stack/core/routing_tables/vector_stores.py +8 -5
  20. llama_stack/core/server/auth.py +192 -17
  21. llama_stack/core/server/fastapi_router_registry.py +40 -5
  22. llama_stack/core/server/server.py +24 -5
  23. llama_stack/core/stack.py +54 -10
  24. llama_stack/core/storage/datatypes.py +9 -0
  25. llama_stack/core/store/registry.py +1 -1
  26. llama_stack/core/utils/exec.py +2 -2
  27. llama_stack/core/utils/type_inspection.py +16 -2
  28. llama_stack/distributions/dell/config.yaml +4 -1
  29. llama_stack/distributions/dell/run-with-safety.yaml +4 -1
  30. llama_stack/distributions/nvidia/config.yaml +4 -1
  31. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
  32. llama_stack/distributions/oci/config.yaml +4 -1
  33. llama_stack/distributions/open-benchmark/config.yaml +9 -1
  34. llama_stack/distributions/postgres-demo/config.yaml +1 -1
  35. llama_stack/distributions/starter/build.yaml +62 -0
  36. llama_stack/distributions/starter/config.yaml +22 -3
  37. llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
  38. llama_stack/distributions/starter/starter.py +13 -1
  39. llama_stack/distributions/starter-gpu/build.yaml +62 -0
  40. llama_stack/distributions/starter-gpu/config.yaml +22 -3
  41. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
  42. llama_stack/distributions/template.py +10 -2
  43. llama_stack/distributions/watsonx/config.yaml +4 -1
  44. llama_stack/log.py +1 -0
  45. llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
  46. llama_stack/providers/inline/agents/meta_reference/agents.py +58 -61
  47. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +53 -51
  48. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +99 -22
  49. llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
  50. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
  51. llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
  52. llama_stack/providers/inline/batches/reference/batches.py +2 -1
  53. llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
  54. llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
  55. llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
  56. llama_stack/providers/inline/post_training/torchtune/common/utils.py +5 -9
  57. llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
  58. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
  59. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
  60. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +20 -24
  61. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
  62. llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
  63. llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
  64. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
  65. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
  66. llama_stack/providers/registry/agents.py +1 -0
  67. llama_stack/providers/registry/inference.py +1 -9
  68. llama_stack/providers/registry/vector_io.py +136 -16
  69. llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
  70. llama_stack/providers/remote/files/s3/config.py +5 -3
  71. llama_stack/providers/remote/files/s3/files.py +2 -2
  72. llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
  73. llama_stack/providers/remote/inference/openai/openai.py +2 -0
  74. llama_stack/providers/remote/inference/together/together.py +4 -0
  75. llama_stack/providers/remote/inference/vertexai/config.py +3 -3
  76. llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
  77. llama_stack/providers/remote/inference/vllm/config.py +37 -18
  78. llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
  79. llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
  80. llama_stack/providers/remote/post_training/nvidia/models.py +3 -11
  81. llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
  82. llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
  83. llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
  84. llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
  85. llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
  86. llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
  87. llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
  88. llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
  89. llama_stack/providers/remote/vector_io/oci/config.py +41 -0
  90. llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
  91. llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
  92. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
  93. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
  94. llama_stack/providers/utils/bedrock/client.py +3 -3
  95. llama_stack/providers/utils/bedrock/config.py +7 -7
  96. llama_stack/providers/utils/inference/__init__.py +0 -25
  97. llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
  98. llama_stack/providers/utils/inference/http_client.py +239 -0
  99. llama_stack/providers/utils/inference/litellm_openai_mixin.py +6 -0
  100. llama_stack/providers/utils/inference/model_registry.py +148 -2
  101. llama_stack/providers/utils/inference/openai_compat.py +1 -158
  102. llama_stack/providers/utils/inference/openai_mixin.py +42 -2
  103. llama_stack/providers/utils/inference/prompt_adapter.py +0 -209
  104. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
  105. llama_stack/providers/utils/memory/vector_store.py +46 -19
  106. llama_stack/providers/utils/responses/responses_store.py +7 -7
  107. llama_stack/providers/utils/safety.py +114 -0
  108. llama_stack/providers/utils/tools/mcp.py +44 -3
  109. llama_stack/testing/api_recorder.py +9 -3
  110. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/METADATA +14 -2
  111. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/RECORD +115 -148
  112. llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
  113. llama_stack/distributions/meta-reference-gpu/doc_template.md +0 -119
  114. llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
  115. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
  116. llama_stack/models/llama/hadamard_utils.py +0 -88
  117. llama_stack/models/llama/llama3/args.py +0 -74
  118. llama_stack/models/llama/llama3/dog.jpg +0 -0
  119. llama_stack/models/llama/llama3/generation.py +0 -378
  120. llama_stack/models/llama/llama3/model.py +0 -304
  121. llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
  122. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
  123. llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
  124. llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
  125. llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
  126. llama_stack/models/llama/llama3/pasta.jpeg +0 -0
  127. llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
  128. llama_stack/models/llama/llama3/quantization/loader.py +0 -316
  129. llama_stack/models/llama/llama3_1/__init__.py +0 -12
  130. llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
  131. llama_stack/models/llama/llama3_1/prompts.py +0 -258
  132. llama_stack/models/llama/llama3_2/__init__.py +0 -5
  133. llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
  134. llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
  135. llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
  136. llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
  137. llama_stack/models/llama/llama3_3/__init__.py +0 -5
  138. llama_stack/models/llama/llama3_3/prompts.py +0 -259
  139. llama_stack/models/llama/llama4/args.py +0 -107
  140. llama_stack/models/llama/llama4/ffn.py +0 -58
  141. llama_stack/models/llama/llama4/moe.py +0 -214
  142. llama_stack/models/llama/llama4/preprocess.py +0 -435
  143. llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
  144. llama_stack/models/llama/llama4/quantization/loader.py +0 -226
  145. llama_stack/models/llama/llama4/vision/__init__.py +0 -5
  146. llama_stack/models/llama/llama4/vision/embedding.py +0 -210
  147. llama_stack/models/llama/llama4/vision/encoder.py +0 -412
  148. llama_stack/models/llama/quantize_impls.py +0 -316
  149. llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
  150. llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
  151. llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
  152. llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
  153. llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
  154. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
  155. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
  156. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/WHEEL +0 -0
  157. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/entry_points.txt +0 -0
  158. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/licenses/LICENSE +0 -0
  159. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/top_level.txt +0 -0
@@ -1,409 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the terms described in the LICENSE file in
5
- # the root directory of this source tree.
6
-
7
- # Copyright (c) Meta Platforms, Inc. and affiliates.
8
- # All rights reserved.
9
- #
10
- # This source code is licensed under the terms described in the LICENSE file in
11
- # top-level folder for each specific model found within the models/ directory at
12
- # the top-level of this source tree.
13
-
14
- import math
15
- from collections import defaultdict
16
- from typing import Any
17
-
18
- import torch
19
- import torchvision.transforms as tv
20
- from PIL import Image
21
- from torchvision.transforms import functional as F
22
-
23
- from llama_stack.log import get_logger
24
-
25
- IMAGE_RES = 224
26
-
27
- logger = get_logger(name=__name__, category="models::llama")
28
-
29
-
30
- class VariableSizeImageTransform:
31
- """
32
- This class accepts images of any size and dynamically resize, pads and chunks it
33
- based on the image aspect ratio and the number of image chunks we allow.
34
-
35
- The algorithm will NOT distort the image fit a certain aspect ratio, because
36
- that leads to a significant degradation in image quality.
37
-
38
- It can be summarized in 6 steps:
39
- 1. Find all possible canvas combinations of max_num_chunks;
40
- 2. Find the best canvas to fit the image;
41
- 3. Resize without distortion
42
- 4. Pad
43
- 5. Normalize
44
- 6. Chunk
45
-
46
- For example, if an input image is of size 300x800, patch_size of 224,
47
- and max_num_chunks = 8, it will find the closest aspect ratio that
48
- is allowed within 8 image chunks, with some restrictions.
49
- In this case, 2:4 = 2 horizontal patches and 4 vertical patches,
50
- giving a total of 8 chunks.
51
-
52
- If resize_to_max_canvas, the image will be resized (without distortion),
53
- to the largest possible resolution. In this case, 388:896, and padded to 448:896,
54
- where we maintain the original aspect ratio and pad with zeros value for the rest.
55
- This approach minimizes the amount of padding required for any arbitrary resolution.
56
-
57
- However, if limit_upscaling_to_patch_size is set to True,
58
- the upscaling will be limited to the patch size. In the example above,
59
- the image would remain 300x800 (no upscaling), and then padded to 448:896.
60
-
61
- The final output will therefore be of shape (8, 3, 224, 224), where 2x4
62
- patches are coming from the resizing and chunking.
63
- """
64
-
65
- def __init__(self, size: int = IMAGE_RES) -> None:
66
- self.size = size
67
- logger.info(f"VariableSizeImageTransform size: {self.size}")
68
- self.to_tensor = tv.ToTensor()
69
- self._mean = (0.48145466, 0.4578275, 0.40821073)
70
- self._std = (0.26862954, 0.26130258, 0.27577711)
71
- self.normalize = tv.Normalize(
72
- mean=self._mean,
73
- std=self._std,
74
- inplace=True,
75
- )
76
- self.resample = tv.InterpolationMode.BILINEAR
77
-
78
- @staticmethod
79
- def get_factors(n: int) -> set[int]:
80
- """
81
- Calculate all factors of a given number, i.e. a dividor that leaves
82
- no remainder. For example, if n=12, it will return {1, 2, 3, 4, 6, 12}.
83
-
84
- Args:
85
- n (int): The number to find factors for.
86
-
87
- Returns:
88
- set: A set containing all factors of the number.
89
- """
90
- factors_set = set()
91
-
92
- for i in range(1, int(n**0.5) + 1):
93
- if n % i == 0:
94
- factors_set.add(i)
95
- factors_set.add(n // i)
96
- return factors_set
97
-
98
- def find_supported_resolutions(self, max_num_chunks: int, patch_size: int) -> list[tuple[int, int]]:
99
- """
100
- Computes all of the allowed resoltuions for a fixed number of chunks
101
- and patch_size. Useful for when dividing an image into chunks.
102
-
103
- Args:
104
- max_num_chunks (int): Maximum number of chunks for processing.
105
- patch_size (int): Size of the side of the patch.
106
-
107
- Returns:
108
- torch.Tensor: List of possible resolutions as tuples (height, width).
109
-
110
- Example:
111
- >>> max_num_chunks = 5
112
- >>> patch_size = 224
113
- >>> find_supported_resolutions(max_num_chunks, patch_size)
114
- tensor([(224, 896), (448, 448), (224, 224), (896, 224), (224, 672),
115
- (672, 224), (224, 448), (448, 224)])
116
-
117
- Given max_num_chunks=4, patch_size=224, it will create a dictionary:
118
- {
119
- 0.25: [(1, 4)],
120
- 1.0: [(2, 2), (1, 1)],
121
- 4.0: [(4, 1)],
122
- 0.33: [(1, 3)],
123
- 3.0: [(3, 1)],
124
- 0.5: [(1, 2)],
125
- 2.0: [(2, 1)]
126
- }
127
-
128
- and return the resolutions multiplied by the patch_size:
129
- [(1*224, 4*224), (2*224, 2*224), ..., (2*224, 1*224)]
130
- """
131
- asp_dict = defaultdict(list)
132
- for chunk_size in range(max_num_chunks, 0, -1):
133
- _factors = sorted(self.get_factors(chunk_size))
134
- _asp_ratios = [(factor, chunk_size // factor) for factor in _factors]
135
- for height, width in _asp_ratios:
136
- ratio_float = height / width
137
- asp_dict[ratio_float].append((height, width))
138
-
139
- # get the resolutions multiplied by the patch_size
140
- possible_resolutions = []
141
- for value in asp_dict.values():
142
- for height, depth in value:
143
- possible_resolutions.append((height * patch_size, depth * patch_size))
144
-
145
- return possible_resolutions
146
-
147
- @staticmethod
148
- def get_max_res_without_distortion(
149
- image_size: tuple[int, int],
150
- target_size: tuple[int, int],
151
- ) -> tuple[int, int]:
152
- """
153
- Determines the maximum resolution to which an image can be resized to without distorting its
154
- aspect ratio, based on the target resolution.
155
-
156
- Args:
157
- image_size (Tuple[int, int]): The original resolution of the image (height, width).
158
- target_resolution (Tuple[int, int]): The desired resolution to fit the image into (height, width).
159
- Returns:
160
- Tuple[int, int]: The optimal dimensions (height, width) to which the image should be resized.
161
- Example:
162
- >>> _get_max_res_without_distortion([200, 300], target_size = [450, 200])
163
- (134, 200)
164
- >>> _get_max_res_without_distortion([800, 600], target_size = [450, 1300])
165
- (450, 338)
166
- """
167
-
168
- original_width, original_height = image_size
169
- target_width, target_height = target_size
170
-
171
- scale_w = target_width / original_width
172
- scale_h = target_height / original_height
173
-
174
- if scale_w < scale_h:
175
- new_width = target_width
176
- new_height = min(math.floor(original_height * scale_w), target_height)
177
- else:
178
- new_height = target_height
179
- new_width = min(math.floor(original_width * scale_h), target_width)
180
-
181
- return new_width, new_height
182
-
183
- def _pad(self, image: Image.Image, target_size) -> Image.Image:
184
- new_width, new_height = target_size
185
- new_im = Image.new(mode="RGB", size=(new_width, new_height), color=(0, 0, 0)) # type: ignore
186
- new_im.paste(image)
187
- return new_im
188
-
189
- def _split(self, image: torch.Tensor, ncw: int, nch: int) -> torch.Tensor:
190
- # Split image into number of required tiles (width x height)
191
- num_channels, height, width = image.size()
192
- image = image.view(num_channels, nch, height // nch, ncw, width // ncw)
193
- # Permute dimensions to reorder the axes
194
- image = image.permute(1, 3, 0, 2, 4).contiguous()
195
- # Reshape into the desired output shape (batch_size * 4, num_channels, width/2, height/2)
196
- image = image.view(ncw * nch, num_channels, height // nch, width // ncw)
197
- return image
198
-
199
- def resize_without_distortion(
200
- self,
201
- image: Image.Image,
202
- target_size: tuple[int, int],
203
- max_upscaling_size: int | None,
204
- ) -> Image.Image:
205
- """
206
- Used to resize an image to target_resolution, without distortion.
207
-
208
- If target_size requires upscaling the image, the user can set max_upscaling_size to
209
- limit the upscaling to a maximum size. In this case, since we rescale without distortion,
210
- modifying target_size works as a boundary for the image's largest side.
211
-
212
- Args:
213
- resample (str): Resampling method used when resizing images.
214
- Supports "nearest", "nearest_exact", "bilinear", "bicubic".
215
- max_upscaling_size (int): The maximum size to upscale the image to.
216
- If None, there is no limit.
217
- Examples:
218
- >>> target_size = (1000, 1200)
219
- >>> max_upscaling_size = 600
220
- >>> image_size = (400, 200)
221
- >>> resize_without_distortion(image_size, target_size, max_upscaling_size)
222
- (600, 300) # new_size_without_distortion
223
-
224
- >>> target_size = (1000, 1200)
225
- >>> max_upscaling_size = 600
226
- >>> image_size = (2000, 200)
227
- >>> resize_without_distortion(image_size, target_size, max_upscaling_size)
228
- (1000, 100) # new_size_without_distortion
229
-
230
- >>> target_size = (1000, 1200)
231
- >>> max_upscaling_size = 2000
232
- >>> image_size = (400, 200)
233
- >>> resize_without_distortion(image_size, target_size, max_upscaling_size)
234
- (1000, 500) # new_size_without_distortion
235
-
236
- >>> target_size = (1000, 1200)
237
- >>> max_upscaling_size = None
238
- >>> image_size = (400, 200)
239
- >>> resize_without_distortion(image_size, target_size, max_upscaling_size)
240
- (1000, 500) # new_size_without_distortion
241
- """
242
-
243
- image_width, image_height = image.size
244
- image_size = (image_width, image_height)
245
-
246
- # If target_size requires upscaling, we might want to limit the upscaling to max_upscaling_size
247
- if max_upscaling_size is not None:
248
- new_target_width = min(max(image_width, max_upscaling_size), target_size[0])
249
- new_target_height = min(max(image_height, max_upscaling_size), target_size[1])
250
- target_size = (new_target_width, new_target_height)
251
-
252
- # resize to target_size while preserving aspect ratio
253
- new_size_without_distortion = self.get_max_res_without_distortion(image_size, target_size)
254
-
255
- image = F.resize(
256
- image,
257
- (new_size_without_distortion[1], new_size_without_distortion[0]),
258
- interpolation=self.resample,
259
- )
260
-
261
- return image
262
-
263
- def get_best_fit(
264
- self,
265
- image_size: tuple[int, int],
266
- possible_resolutions: torch.Tensor,
267
- resize_to_max_canvas: bool = False,
268
- ) -> tuple[int, int]:
269
- """
270
- Determines the best canvas possible from a list of possible resolutions to, without distortion,
271
- resize an image to.
272
-
273
- For each possible resolution, calculates the scaling factors for
274
- width and height, and selects the smallest one, which is the limiting side.
275
- E.g. to match the canvas you can upscale height by 2x, and width by 1.5x,
276
- therefore, the maximum upscaling you can do is min(2, 1.5) = 1.5.
277
-
278
- If upscaling is possible (any of the scaling factors is greater than 1),
279
- then picks the smallest upscaling factor > 1, unless resize_to_max_canvas is True.
280
-
281
- If upscaling is not possible, then picks the largest scaling factor <= 1, i.e.
282
- reduce downscaling as much as possible.
283
-
284
- If there are multiple resolutions with the same max scale, we pick the one with the lowest area,
285
- to minimize padding. E.g., the same image can be upscaled to 224x224 and 224x448, but the latter
286
- has more padding.
287
-
288
- Args:
289
- image_size (Tuple[int, int]): A tuple containing the height and width of the image.
290
- possible_resolutions (torch.Tensor): A tensor of shape (N, 2) where each
291
- row represents a possible resolution (height, width).
292
- use_max_upscaling (bool): If True, will return the largest upscaling resolution.
293
-
294
- Returns:
295
- List[int]: The best resolution [height, width] for the given image.
296
-
297
- Example:
298
- >>> image_size = (200, 300)
299
- >>> possible_resolutions = torch.tensor([[224, 672],
300
- ... [672, 224],
301
- ... [224, 448],
302
- ... [448, 224],
303
- ... [224, 224]])
304
- >>> _get_smallest_upscaling_possibility(image_size, possible_resolutions)
305
- [224, 448]
306
-
307
- We have:
308
- scale_w = tensor([2.2400, 0.7467, 1.4933, 0.7467, 0.7467])
309
- scale_h = tensor([1.1200, 3.3600, 1.1200, 2.2400, 1.1200])
310
- scales = tensor([1.1200, 0.7467, 1.1200, 0.7467, 0.7467])
311
- Only one of the scales > 1:
312
- upscaling_possible = tensor([1.1200, 1.1200])
313
- smallest_rescale = tensor(1.1200)
314
- So we pick the resolution with the smallest smallest area:
315
- areas = tensor([150528, 100352]) # [672, 224], [224, 448]
316
- optimal_canvas = tensor([224, 448])
317
- """
318
-
319
- original_width, original_height = image_size
320
-
321
- # get all possible resolutions heights/widths
322
- target_widths, target_heights = (
323
- possible_resolutions[:, 0],
324
- possible_resolutions[:, 1],
325
- )
326
-
327
- # get scaling factors to resize the image without distortion
328
- scale_w = target_widths / original_width
329
- scale_h = target_heights / original_height
330
-
331
- # get the min scale between width and height (limiting side -> no distortion)
332
- scales = torch.where(scale_w > scale_h, scale_h, scale_w)
333
-
334
- # filter only scales that allow upscaling
335
- upscaling_options = scales[scales >= 1]
336
- if len(upscaling_options) > 0:
337
- if resize_to_max_canvas:
338
- selected_scale = torch.max(upscaling_options)
339
- else:
340
- selected_scale = torch.min(upscaling_options)
341
- else:
342
- # no upscaling possible,
343
- # get the minimum downscaling (max scale for scales<1)
344
- downscaling_options = scales[scales < 1]
345
- selected_scale = torch.max(downscaling_options)
346
-
347
- # get all resolutions that support this scaling factor,
348
- # e.g. you can upscale to 224x224, 224x448, 224x672 without distortion
349
- chosen_canvas = possible_resolutions[scales == selected_scale]
350
-
351
- # if there are multiple resolutions,
352
- # get the one with minimum area to reduce padding
353
- if len(chosen_canvas) > 1:
354
- areas = chosen_canvas[:, 0] * chosen_canvas[:, 1]
355
- optimal_idx = torch.argmin(areas)
356
- optimal_canvas = chosen_canvas[optimal_idx]
357
- else:
358
- optimal_canvas = chosen_canvas[0]
359
-
360
- return tuple(optimal_canvas.tolist())
361
-
362
- def __call__(
363
- self,
364
- image: Image.Image,
365
- max_num_chunks: int,
366
- normalize_img: bool = True,
367
- resize_to_max_canvas: bool = False,
368
- ) -> tuple[Any, Any]:
369
- """
370
- Args:
371
- image (PIL.Image): Image to be resized.
372
- max_num_chunks (int): Maximum number of chunks to split the image into.
373
- normalize_img (bool): Whether to normalize the image.
374
- resize_to_max_canvas (bool): Whether to resize the image to the maximum canvas size.
375
- If True, picks the canvas the allows the largest resizing without distortion.
376
- If False, downsample as little as possible, including no resizing at all,
377
- but never upsample, unless the image is smaller than the patch size.
378
- """
379
- assert max_num_chunks > 0
380
- assert isinstance(image, Image.Image), type(image)
381
- w, h = image.size
382
-
383
- possible_resolutions_list = self.find_supported_resolutions(max_num_chunks=max_num_chunks, patch_size=self.size)
384
- possible_resolutions_tensor = torch.tensor(possible_resolutions_list)
385
-
386
- best_resolution = self.get_best_fit(
387
- image_size=(w, h),
388
- possible_resolutions=possible_resolutions_tensor,
389
- resize_to_max_canvas=resize_to_max_canvas,
390
- )
391
-
392
- max_upscaling_size = None if resize_to_max_canvas else self.size
393
- image = self.resize_without_distortion(image, best_resolution, max_upscaling_size)
394
- image = self._pad(image, best_resolution)
395
-
396
- image = self.to_tensor(image)
397
-
398
- if normalize_img:
399
- image = self.normalize(image)
400
-
401
- ratio_w, ratio_h = (
402
- best_resolution[0] // self.size,
403
- best_resolution[1] // self.size,
404
- )
405
-
406
- image = self._split(image, ratio_w, ratio_h) # type: ignore
407
-
408
- ar = (ratio_h, ratio_w)
409
- return image, ar