PyPI - docling-ibm-models - Versions diffs - 3.3.0__py3-none-any.whl → 3.3.2__py3-none-any.whl - Mend

docling-ibm-models 3.3.0py3-none-any.whl → 3.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

docling_ibm_models/__init__.py ADDED Viewed

File without changes

docling_ibm_models/code_formula_model/__init__.py ADDED Viewed

File without changes

docling_ibm_models/code_formula_model/code_formula_predictor.py CHANGED Viewed

@@ -3,12 +3,12 @@
 # SPDX-License-Identifier: MIT
 #
 import logging
-from typing import List, Union
+from typing import List, Optional, Union
 import numpy as np
 import torch
 from PIL import Image
-from transformers import AutoTokenizer
+from transformers import AutoTokenizer, StoppingCriteria, StoppingCriteriaList
 from docling_ibm_models.code_formula_model.models.sam_opt import SamOPTForCausalLM
 from docling_ibm_models.code_formula_model.models.sam_opt_image_processor import (
@@ -18,6 +18,22 @@ from docling_ibm_models.code_formula_model.models.sam_opt_image_processor import
 _log = logging.getLogger(__name__)
+class StopOnString(StoppingCriteria):
+    def __init__(self, tokenizer, stop_string):
+        self.stop_token_ids = tokenizer.encode(stop_string, add_special_tokens=False)
+    def __call__(self, input_ids, scores, **kwargs):
+        for sequence in input_ids:
+            sequence_list = sequence.tolist()
+            for i in range(len(sequence_list) - len(self.stop_token_ids) + 1):
+                if (
+                    sequence_list[i : i + len(self.stop_token_ids)]
+                    == self.stop_token_ids
+                ):
+                    return True
+        return False
 class CodeFormulaPredictor:
     """
     Code and Formula Predictor using a multi-modal vision-language model.
@@ -127,12 +143,37 @@ class CodeFormulaPredictor:
         return prompt
+    def _strip(self, text: str):
+        """
+        Removes any occurrences of the substrings in remove_list from the end of text.
+        Parameters
+        ----------
+        text : str
+            The original string.
+        Returns
+        -------
+        str
+            The trimmed string.
+        """
+        remove_list = [r"\quad", r"\\", r"\,", " c c c c", " l l l l l"]
+        changed = True
+        while changed:
+            changed = False
+            for substr in remove_list:
+                if text.endswith(substr):
+                    text = text[: -len(substr)]
+                    changed = True
+        return text.strip()
     @torch.inference_mode()
     def predict(
         self,
         images: List[Union[Image.Image, np.ndarray]],
         labels: List[str],
-        temperature: float = 0.1,
+        temperature: Optional[float] = 0.0,
     ) -> List[str]:
         """
         Predicts the textual representation of input images (code or LaTeX).
@@ -143,8 +184,8 @@ class CodeFormulaPredictor:
             List of images to be processed, provided as PIL Image objects or numpy arrays.
         labels : List[str]
             List of labels indicating the type of each image ('code' or 'formula').
-        temperature : float, optional
-            Sampling temperature for generation, by default set to 0.1.
+        temperature : Optional[float]
+            Sampling temperature for generation, by default set to 0.0.
         Returns
         -------
@@ -159,7 +200,11 @@ class CodeFormulaPredictor:
         Excpetion
             In case the temperature is an invalid number.
         """
-        if (type(temperature) != float and type(temperature) != int) or temperature < 0:
+        if (
+            temperature is None
+            or not (isinstance(temperature, float) or isinstance(temperature, int))
+            or temperature < 0
+        ):
             raise Exception("Temperature must be a number greater or equal to 0.")
         do_sample = True
@@ -181,11 +226,10 @@ class CodeFormulaPredictor:
             else:
                 raise TypeError("Not supported input image format")
             images_tmp.append(image)
-        images = images_tmp
-        images_tensor = torch.stack([self._image_processor(img) for img in images]).to(
-            self._device
-        )
+        images_tensor = torch.stack(
+            [self._image_processor(img) for img in images_tmp]
+        ).to(self._device)
         prompts = [self._get_prompt(label) for label in labels]
@@ -195,6 +239,16 @@ class CodeFormulaPredictor:
         prompt_ids = tokenized["input_ids"]
         attention_mask = tokenized["attention_mask"]
+        stopping_criteria = StoppingCriteriaList(
+            [
+                StopOnString(self._tokenizer, r" \quad \quad \quad \quad"),
+                StopOnString(self._tokenizer, r" \\ \\ \\ \\"),
+                StopOnString(self._tokenizer, r" \, \, \, \,"),
+                StopOnString(self._tokenizer, r" c c c c c c c c c c c c c c c c"),
+                StopOnString(self._tokenizer, r" l l l l l l l l l l l l l l l l l"),
+            ]
+        )
         if self._device == "cpu":
             output_ids_list = self._model.generate(
                 input_ids=prompt_ids,
@@ -204,6 +258,8 @@ class CodeFormulaPredictor:
                 temperature=temperature,
                 max_new_tokens=4096 - prompt_ids.shape[1],
                 use_cache=True,
+                no_repeat_ngram_size=200,
+                stopping_criteria=stopping_criteria,
             )
         else:
             with torch.autocast(device_type=self._device, dtype=torch.bfloat16):
@@ -214,10 +270,13 @@ class CodeFormulaPredictor:
                     temperature=temperature,
                     max_new_tokens=4096 - prompt_ids.shape[1],
                     use_cache=True,
+                    no_repeat_ngram_size=200,
+                    stopping_criteria=stopping_criteria,
                 )
         outputs = self._tokenizer.batch_decode(
             output_ids_list[:, prompt_ids.shape[1] :], skip_special_tokens=True
         )
+        outputs = [self._strip(output) for output in outputs]
         return outputs

docling_ibm_models/code_formula_model/models/__init__.py ADDED Viewed

File without changes

docling_ibm_models/code_formula_model/models/sam_opt.py CHANGED Viewed

@@ -67,14 +67,14 @@ class SamOPTModel(OPTModel):
     def forward(
         self,
-        input_ids: torch.LongTensor = None,
+        input_ids: torch.LongTensor,
         attention_mask: Optional[torch.Tensor] = None,
         past_key_values: Optional[List[torch.FloatTensor]] = None,
         inputs_embeds: Optional[torch.FloatTensor] = None,
         use_cache: Optional[bool] = None,
         output_attentions: Optional[bool] = None,
         output_hidden_states: Optional[bool] = None,
-        images: torch.FloatTensor = None,
+        images: Optional[torch.FloatTensor] = None,
         return_dict: Optional[bool] = None,
     ) -> Union[Tuple, BaseModelOutputWithPast]:
@@ -86,6 +86,7 @@ class SamOPTModel(OPTModel):
         if input_ids.shape[1] != 1 or self.training:
             with torch.set_grad_enabled(self.training):
+                assert vision_tower is not None
                 image_features = vision_tower(images)
                 image_features = image_features.flatten(2).permute(0, 2, 1)
                 image_features = self.mm_projector(image_features)
@@ -94,9 +95,9 @@ class SamOPTModel(OPTModel):
             for cur_input_ids, cur_input_embeds, cur_image_features in zip(
                 input_ids, inputs_embeds, image_features
             ):
-                image_start_token_position = torch.where(
-                    cur_input_ids == im_start_token
-                )[0].item()
+                image_start_token_position = int(
+                    torch.where(cur_input_ids == im_start_token)[0].item()
+                )  # cast to int for mypy
                 cur_image_features = cur_image_features.to(
                     device=cur_input_embeds.device
@@ -115,7 +116,7 @@ class SamOPTModel(OPTModel):
                 new_input_embeds.append(cur_input_embeds)
-            inputs_embeds = torch.stack(new_input_embeds, dim=0)
+            inputs_embeds = torch.stack(new_input_embeds, dim=0)  # type: ignore
         return super(SamOPTModel, self).forward(
             input_ids=None,

docling_ibm_models/code_formula_model/models/sam_opt_image_processor.py CHANGED Viewed

@@ -28,4 +28,7 @@ class SamOptImageProcessor(ImageProcessingMixin):
         return image
-AutoImageProcessor.register(SamOptImageProcessor, SamOptImageProcessor)
+AutoImageProcessor.register(
+    config_class="SamOptImageProcessor",
+    slow_image_processor_class=SamOptImageProcessor,
+)

docling_ibm_models/document_figure_classifier_model/__init__.py ADDED Viewed

File without changes

docling_ibm_models/document_figure_classifier_model/document_figure_classifier_predictor.py CHANGED Viewed

@@ -147,24 +147,23 @@ class DocumentFigureClassifierPredictor:
             The predictions for each image are sorted in descending order of confidence.
         """
-        processed_images = []
+        rgb_images = []
         for image in images:
             if isinstance(image, Image.Image):
-                processed_images.append(image.convert("RGB"))
+                rgb_images.append(image.convert("RGB"))
             elif isinstance(image, np.ndarray):
-                processed_images.append(Image.fromarray(image).convert("RGB"))
+                rgb_images.append(Image.fromarray(image).convert("RGB"))
             else:
                 raise TypeError(
                     "Supported input formats are PIL.Image.Image or numpy.ndarray."
                 )
-        images = processed_images
         # (batch_size, 3, 224, 224)
-        images = [self._image_processor(image) for image in images]
-        images = torch.stack(images).to(self._device)
+        processed_images = [self._image_processor(image) for image in rgb_images]
+        torch_images = torch.stack(processed_images).to(self._device)
         with torch.no_grad():
-            logits = self._model(images).logits  # (batch_size, num_classes)
+            logits = self._model(torch_images).logits  # (batch_size, num_classes)
             probs_batch = logits.softmax(dim=1)  # (batch_size, num_classes)
             probs_batch = probs_batch.cpu().numpy().tolist()

docling_ibm_models/layoutmodel/__init__.py ADDED Viewed

File without changes

docling_ibm_models/py.typed ADDED Viewed

File without changes

docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py CHANGED Viewed

@@ -36,7 +36,7 @@ class PositionalEncoding(nn.Module):
 class TMTransformerDecoder(nn.TransformerDecoder):
-    def forward(
+    def forward(  # type: ignore
         self,
         tgt: Tensor,
         memory: Optional[Tensor] = None,
@@ -69,11 +69,11 @@ class TMTransformerDecoder(nn.TransformerDecoder):
         else:
             out_cache = torch.stack(tag_cache, dim=0)
-        return output, out_cache
+        return output, out_cache  # type: ignore
 class TMTransformerDecoderLayer(nn.TransformerDecoderLayer):
-    def forward(
+    def forward(  # type: ignore
         self,
         tgt: Tensor,
         memory: Optional[Tensor] = None,

docling_ibm_models/tableformer/otsl.py CHANGED Viewed

@@ -11,7 +11,7 @@ import docling_ibm_models.tableformer.settings as s
 LOG_LEVEL = logging.INFO
 # LOG_LEVEL = logging.DEBUG
 logger = s.get_custom_logger("consolidate", LOG_LEVEL)
-png_files = {}  # Evaluation files
+# png_files = {}  # Evaluation files
 total_pics = 0

docling_ibm_models/tableformer/utils/mem_monitor.py CHANGED Viewed

@@ -5,6 +5,7 @@
 import os
 import platform
 import re
+from typing import Dict, Union
 class MemMonitor:
@@ -112,7 +113,7 @@ class MemMonitor:
             regex_str = r"({}:)(\s+)(\d*)(.*)".format(mem_field)
             self._status_regex[mem_field] = re.compile(regex_str)
-    def get_memory_full(self) -> dict:
+    def get_memory_full(self) -> Union[Dict, int]:
         r"""
         - Parse /proc/<pid>status to get all memory info.
         - The method returns a dict with the fields self._status_fields
@@ -140,7 +141,7 @@ class MemMonitor:
         return memory
-    def get_memory(self) -> dict:
+    def get_memory(self) -> Union[Dict, int]:
         r"""
         - Parse /proc/<pid>statm to get the most important memory fields
         - This is a fast implementation.

{docling_ibm_models-3.3.0.dist-info → docling_ibm_models-3.3.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: docling-ibm-models
-Version: 3.3.0
+Version: 3.3.2
 Summary: This package contains the AI models used by the Docling PDF conversion package
 License: MIT
 Keywords: docling,convert,document,pdf,layout model,segmentation,table structure,table former
@@ -20,7 +20,7 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
-Requires-Dist: Pillow (>=10.0.0,<11.0.0)
+Requires-Dist: Pillow (>=10.0.0,<12.0.0)
 Requires-Dist: huggingface_hub (>=0.23,<1)
 Requires-Dist: jsonlines (>=3.1.0,<4.0.0)
 Requires-Dist: numpy (>=1.24.4,<2.0.0) ; sys_platform == "darwin" and platform_machine == "x86_64"

{docling_ibm_models-3.3.0.dist-info → docling_ibm_models-3.3.2.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,15 @@
-docling_ibm_models/code_formula_model/code_formula_predictor.py,sha256=DV8w-kj0Ite4mW1XfPx_xo8FFBE_lEPtZrJezOUI8tU,7359
+docling_ibm_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+docling_ibm_models/code_formula_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+docling_ibm_models/code_formula_model/code_formula_predictor.py,sha256=yX0Cd1o-dkJtju5WKtSk7L2b0jc6P_KgxrsZN5_SBb0,9445
+docling_ibm_models/code_formula_model/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling_ibm_models/code_formula_model/models/sam.py,sha256=6MXf1ae_wRWJ4b1luISWXBRKyoQie7YbpY-qwq1OJJA,17841
-docling_ibm_models/code_formula_model/models/sam_opt.py,sha256=qQjmZZgInmKWBp8qcpYZjR2pr5jzjpYRp404RcsJyZM,8333
-docling_ibm_models/code_formula_model/models/sam_opt_image_processor.py,sha256=rA06J4vCK3s9qgfDreJJCcIYUyJzihBk0kHPskfUPGc,868
-docling_ibm_models/document_figure_classifier_model/document_figure_classifier_predictor.py,sha256=LChDhSXf4mmg5UeZKT1zQwVsj2bvF9cjwxRxMZo9Oto,5811
+docling_ibm_models/code_formula_model/models/sam_opt.py,sha256=pvMhvbjBz_8NFkn0PDcd6I_m9Py2dZsKGCqQSbnFmP8,8429
+docling_ibm_models/code_formula_model/models/sam_opt_image_processor.py,sha256=6uAU4KSr4pVTesQZHgvbZxSjvJZ3JfRSdetuoos3DeE,921
+docling_ibm_models/document_figure_classifier_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+docling_ibm_models/document_figure_classifier_model/document_figure_classifier_predictor.py,sha256=vRIp02rs9Xa4n1K-M7AYO_tFj4S7WQCQmL9i006T9Qk,5795
+docling_ibm_models/layoutmodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling_ibm_models/layoutmodel/layout_predictor.py,sha256=ArVgs7FBOiu23TC-JoybcaTp7F7a4BgYC8uRVxTgx4E,5681
+docling_ibm_models/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling_ibm_models/tableformer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling_ibm_models/tableformer/common.py,sha256=2zgGZBFf4fXytEaXrZR2NU6FWdX2kxO0DHlGZmuvpNQ,3230
 docling_ibm_models/tableformer/data_management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -19,15 +25,14 @@ docling_ibm_models/tableformer/models/table04_rs/__init__.py,sha256=47DEQpj8HBSa
 docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py,sha256=JV9rFh9caT3qnwWlZ0CZpw5aiiNzyTbfVp6H6JMxS0Q,6117
 docling_ibm_models/tableformer/models/table04_rs/encoder04_rs.py,sha256=iExmqJ0Pn0lJU3nWb_x8abTn42GctMqE55_YA2ppgvc,1975
 docling_ibm_models/tableformer/models/table04_rs/tablemodel04_rs.py,sha256=Mv17JGgO12hIt8jrnflWLgOimdFYkBLuV0rxaGawBpk,12266
-docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py,sha256=nhnYFlXT5KyJMdB4qMo5r8GimWXVy0lcqcmoHPEl-KE,6416
-docling_ibm_models/tableformer/otsl.py,sha256=QxAODv6D0SkWK0pYp_RkZZbqMmcC-jwngxKUYGuCH5E,21389
+docling_ibm_models/tableformer/models/table04_rs/transformer_rs.py,sha256=2i5qqVijyic2VeMI0d1-9gVg2vTbxfi9Ciyo-r41iOY,6464
+docling_ibm_models/tableformer/otsl.py,sha256=DxEwJVC_IqomZs_wUzj-TWjUUgQuEVcm8MXru7VYGkA,21391
 docling_ibm_models/tableformer/settings.py,sha256=UlpsP0cpJZR2Uk48lgysYy0om3fr8Xt3z1xzvlTw5j4,3067
 docling_ibm_models/tableformer/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docling_ibm_models/tableformer/utils/app_profiler.py,sha256=Pb7o1zcikKXh7ninaNt4_nVa1xuUrogZxbTr6U6jkEE,8392
-docling_ibm_models/tableformer/utils/mem_monitor.py,sha256=ycZ07fUBVVKKLTVGF54jGPDM2aTkKuZWk1kMbOS0wwQ,6353
-docling_ibm_models/tableformer/utils/torch_utils.py,sha256=uN0rK9mSXy1ewBnBnILrWebJhhVU4N-XJZBqNiLJwlQ,8893
+docling_ibm_models/tableformer/utils/mem_monitor.py,sha256=NFZUnrfLThXNZQrm3ESRmPSJmPF2J1z3E2v_72O4dRw,6408
 docling_ibm_models/tableformer/utils/utils.py,sha256=8Bxf1rEn977lFbY9NX0r5xh9PvxIRipQZX_EZW92XfA,10980
-docling_ibm_models-3.3.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
-docling_ibm_models-3.3.0.dist-info/METADATA,sha256=xx9irCjnJWp2grHO92lKE2XWfY2MkvX3B5xBc2a6Q8I,7347
-docling_ibm_models-3.3.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
-docling_ibm_models-3.3.0.dist-info/RECORD,,
+docling_ibm_models-3.3.2.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
+docling_ibm_models-3.3.2.dist-info/METADATA,sha256=rHUKawXijJBFGjFKjNl4fRpUFC0ChvURUFOvUqL2t04,7347
+docling_ibm_models-3.3.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
+docling_ibm_models-3.3.2.dist-info/RECORD,,

docling_ibm_models/tableformer/utils/torch_utils.py DELETED Viewed

@@ -1,216 +0,0 @@
-#
-# Copyright IBM Corp. 2024 - 2024
-# SPDX-License-Identifier: MIT
-#
-import torch
-def model_info(model, verbose=False):
-    # Plots a line-by-line description of a PyTorch model
-    n_p = sum(x.numel() for x in model.parameters())  # number parameters
-    n_g = sum(
-        x.numel() for x in model.parameters() if x.requires_grad
-    )  # number gradients
-    if verbose:
-        print(
-            "%5s %40s %9s %12s %20s %10s %10s"
-            % ("layer", "name", "gradient", "parameters", "shape", "mu", "sigma")
-        )
-        for i, (name, p) in enumerate(model.named_parameters()):
-            name = name.replace("module_list.", "")
-            print(
-                "%5g %40s %9s %12g %20s %10.3g %10.3g"
-                % (
-                    i,
-                    name,
-                    p.requires_grad,
-                    p.numel(),
-                    list(p.shape),
-                    p.mean(),
-                    p.std(),
-                )
-            )
-    try:  # FLOPS
-        from thop import profile
-        macs, _ = profile(model, inputs=(torch.zeros(1, 3, 480, 640),), verbose=False)
-        fs = ", %.1f GFLOPS" % (macs / 1e9 * 2)
-    except Exception:
-        fs = ""
-    print(
-        "Model Summary: %g layers, %g parameters, %g gradients%s"
-        % (len(list(model.parameters())), n_p, n_g, fs)
-    )
-# def init_seeds(seed=0):
-#     torch.manual_seed(seed)
-#
-#     # Reduce randomness (may be slower on Tesla GPUs)
-#     # https://pytorch.org/docs/stable/notes/randomness.html
-#     if seed == 0:
-#         cudnn.deterministic = False
-#         cudnn.benchmark = True
-#
-#
-# def select_device(device='', apex=False, batch_size=None):
-#     # device = 'cpu' or '0' or '0,1,2,3'
-#     cpu_request = device.lower() == 'cpu'
-#     if device and not cpu_request:  # if device requested other than 'cpu'
-#         os.environ['CUDA_VISIBLE_DEVICES'] = device  # set environment variable
-#         # check availablity
-#         assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device
-#
-#     cuda = False if cpu_request else torch.cuda.is_available()
-#     if cuda:
-#         c = 1024 ** 2  # bytes to MB
-#         ng = torch.cuda.device_count()
-#         if ng > 1 and batch_size:  # check that batch_size is compatible with device_count
-#             assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % \
-#                 (batch_size, ng)
-#         x = [torch.cuda.get_device_properties(i) for i in range(ng)]
-#         # apex for mixed precision https://github.com/NVIDIA/apex
-#         s = 'Using CUDA ' + ('Apex ' if apex else '')
-#         for i in range(0, ng):
-#             if i == 1:
-#                 s = ' ' * len(s)
-#             print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" %
-#                   (s, i, x[i].name, x[i].total_memory / c))
-#     else:
-#         print('Using CPU')
-#
-#     print('')  # skip a line
-#     return torch.device('cuda:0' if cuda else 'cpu')
-#
-#
-# def time_synchronized():
-#     torch.cuda.synchronize() if torch.cuda.is_available() else None
-#     return time.time()
-#
-#
-# def initialize_weights(model):
-#     for m in model.modules():
-#         t = type(m)
-#         if t is nn.Conv2d:
-#             pass  # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
-#         elif t is nn.BatchNorm2d:
-#             m.eps = 1e-4
-#             m.momentum = 0.03
-#         elif t in [nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
-#             m.inplace = True
-#
-#
-# def find_modules(model, mclass=nn.Conv2d):
-#     # finds layer indices matching module class 'mclass'
-#     return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
-#
-#
-# def fuse_conv_and_bn(conv, bn):
-#     # https://tehnokv.com/posts/fusing-batchnorm-and-conv/
-#     with torch.no_grad():
-#         # init
-#         fusedconv = torch.nn.Conv2d(conv.in_channels,
-#                                     conv.out_channels,
-#                                     kernel_size=conv.kernel_size,
-#                                     stride=conv.stride,
-#                                     padding=conv.padding,
-#                                     bias=True)
-#
-#         # prepare filters
-#         w_conv = conv.weight.clone().view(conv.out_channels, -1)
-#         w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
-#         fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
-#
-#         # prepare spatial bias
-#         if conv.bias is not None:
-#             b_conv = conv.bias
-#         else:
-#             b_conv = torch.zeros(conv.weight.size(0))
-#         b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
-#         fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
-#
-#         return fusedconv
-#
-#
-# def load_classifier(name='resnet101', n=2):
-#     # Loads a pretrained model reshaped to n-class output
-#     import pretrainedmodels  # https://github.com/Cadene/pretrained-models.pytorch#torchvision
-#     model = pretrainedmodels.__dict__[name](num_classes=1000, pretrained='imagenet')
-#
-#     # Display model properties
-#     for x in ['model.input_size', 'model.input_space', 'model.input_range', 'model.mean',
-#               'model.std']:
-#         print(x + ' =', eval(x))
-#
-#     # Reshape output to n classes
-#     filters = model.last_linear.weight.shape[1]
-#     model.last_linear.bias = torch.nn.Parameter(torch.zeros(n))
-#     model.last_linear.weight = torch.nn.Parameter(torch.zeros(n, filters))
-#     model.last_linear.out_features = n
-#     return model
-#
-#
-# def scale_img(img, ratio=1.0, same_shape=True):  # img(16,3,256,416), r=ratio
-#     # scales img(bs,3,y,x) by ratio
-#     h, w = img.shape[2:]
-#     s = (int(h * ratio), int(w * ratio))  # new size
-#     img = F.interpolate(img, size=s, mode='bilinear', align_corners=False)  # resize
-#     if not same_shape:  # pad/crop img
-#         gs = 64  # (pixels) grid size
-#         h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)]
-#     return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447)  # value = imagenet mean
-#
-#
-# class ModelEMA:
-#     """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
-#     Keep a moving average of everything in the model state_dict (parameters and buffers).
-#     This is intended to allow functionality like
-#     https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
-#     A smoothed version of the weights is necessary for some training schemes to perform well.
-#     E.g. Google's hyper-params for training MNASNet, MobileNet-V3, EfficientNet, etc that use
-#     RMSprop with a short 2.4-3 epoch decay period and slow LR decay rate of .96-.99 requires EMA
-#     smoothing of weights to match results. Pay attention to the decay constant you are using
-#     relative to your update count per epoch.
-#     To keep EMA from using GPU resources, set device='cpu'. This will save a bit of memory but
-#     disable validation of the EMA weights. Validation will have to be done manually in a separate
-#     process, or after the training stops converging.
-#     This class is sensitive where it is initialized in the sequence of model init,
-#     GPU assignment and distributed training wrappers.
-#     I've tested with the sequence in my own train.py for torch.DataParallel, apex.DDP, and
-#     single-GPU.
-#     """
-#
-#     def __init__(self, model, decay=0.9999, device=''):
-#         # make a copy of the model for accumulating moving average of weights
-#         self.ema = deepcopy(model)
-#         self.ema.eval()
-#         self.updates = 0  # number of EMA updates
-#         # decay exponential ramp (to help early epochs)
-#         self.decay = lambda x: decay * (1 - math.exp(-x / 2000))
-#         self.device = device  # perform ema on different device from model if set
-#         if device:
-#             self.ema.to(device=device)
-#         for p in self.ema.parameters():
-#             p.requires_grad_(False)
-#
-#     def update(self, model):
-#         self.updates += 1
-#         d = self.decay(self.updates)
-#         with torch.no_grad():
-#             if type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel):
-#                 msd, esd = model.module.state_dict(), self.ema.module.state_dict()
-#             else:
-#                 msd, esd = model.state_dict(), self.ema.state_dict()
-#
-#             for k, v in esd.items():
-#                 if v.dtype.is_floating_point:
-#                     v *= d
-#                     v += (1. - d) * msd[k].detach()
-#
-#     def update_attr(self, model):
-#         # Assign attributes (which may change during training)
-#         for k in model.__dict__.keys():
-#             if not k.startswith('_'):
-#                 setattr(self.ema, k, getattr(model, k))

{docling_ibm_models-3.3.0.dist-info → docling_ibm_models-3.3.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{docling_ibm_models-3.3.0.dist-info → docling_ibm_models-3.3.2.dist-info}/WHEEL RENAMED Viewed

File without changes

docling-ibm-models 3.3.0__py3-none-any.whl → 3.3.2__py3-none-any.whl

docling-ibm-models 3.3.0py3-none-any.whl → 3.3.2py3-none-any.whl