sciveo 0.1.39__tar.gz → 0.1.41__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. {sciveo-0.1.39 → sciveo-0.1.41}/PKG-INFO +1 -1
  2. sciveo-0.1.41/sciveo/ml/images/segmentation.py +304 -0
  3. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/ml/nlp/embeddings.py +22 -0
  4. sciveo-0.1.41/sciveo/ml/nlp/tokenizers/bpe.py +76 -0
  5. sciveo-0.1.41/sciveo/tools/aws/__init__.py +0 -0
  6. sciveo-0.1.41/sciveo/version.py +2 -0
  7. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo.egg-info/PKG-INFO +1 -1
  8. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo.egg-info/SOURCES.txt +5 -1
  9. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo.egg-info/requires.txt +2 -0
  10. {sciveo-0.1.39 → sciveo-0.1.41}/setup.py +1 -1
  11. sciveo-0.1.41/test/test_tokenizers.py +34 -0
  12. sciveo-0.1.39/sciveo/version.py +0 -2
  13. {sciveo-0.1.39 → sciveo-0.1.41}/README.md +0 -0
  14. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/__init__.py +0 -0
  15. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/api/__init__.py +0 -0
  16. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/api/base.py +0 -0
  17. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/api/upload.py +0 -0
  18. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/cli.py +0 -0
  19. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/common/__init__.py +0 -0
  20. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/common/configuration.py +0 -0
  21. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/common/model.py +0 -0
  22. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/common/optimizers.py +0 -0
  23. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/common/sampling.py +0 -0
  24. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/content/__init__.py +0 -0
  25. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/content/dataset.py +0 -0
  26. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/content/experiment.py +0 -0
  27. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/content/project.py +0 -0
  28. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/content/runner.py +0 -0
  29. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/__init__.py +0 -0
  30. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/ml/__init__.py +0 -0
  31. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/ml/base.py +0 -0
  32. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/ml/encoders/__init__.py +0 -0
  33. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/ml/encoders/base.py +0 -0
  34. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/ml/encoders/normalizer.py +0 -0
  35. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/ml/nlp/__init__.py +0 -0
  36. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/ml/nlp/search.py +0 -0
  37. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/ml/time_series/__init__.py +0 -0
  38. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/ml/time_series/dataset.py +0 -0
  39. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/ml/time_series/predictor.py +0 -0
  40. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/ml/time_series/trainer.py +0 -0
  41. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/ml/time_series/window_generator.py +0 -0
  42. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/__init__.py +0 -0
  43. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/base.py +0 -0
  44. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/job_daemon.py +0 -0
  45. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/layouts/__init__.py +0 -0
  46. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/layouts/base.py +0 -0
  47. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/pipeline.py +0 -0
  48. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/postprocessors/__init__.py +0 -0
  49. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/postprocessors/base.py +0 -0
  50. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/postprocessors/default.py +0 -0
  51. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/__init__.py +0 -0
  52. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/audio/__init__.py +0 -0
  53. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/audio/audio.py +0 -0
  54. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/audio/audio_extractor_process.py +0 -0
  55. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/aws.py +0 -0
  56. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/base.py +0 -0
  57. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/file/__init__.py +0 -0
  58. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/file/archive.py +0 -0
  59. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/image/__init__.py +0 -0
  60. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/image/album.py +0 -0
  61. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/image/album_in_image.py +0 -0
  62. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/image/depth_esimation.py +0 -0
  63. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/image/embeddings.py +0 -0
  64. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/image/filters.py +0 -0
  65. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/image/generators.py +0 -0
  66. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/image/histogram.py +0 -0
  67. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/image/mask.py +0 -0
  68. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/image/object_detection.py +0 -0
  69. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/image/resize.py +0 -0
  70. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/image/segmentation.py +0 -0
  71. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/image/watermark.py +0 -0
  72. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/media_info.py +0 -0
  73. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/nlp/__init__.py +0 -0
  74. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/nlp/address.py +0 -0
  75. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/qr.py +0 -0
  76. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/sci/__init__.py +0 -0
  77. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/sci/base.py +0 -0
  78. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/sci/dataset.py +0 -0
  79. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/sci/time_series/__init__.py +0 -0
  80. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/sci/time_series/predictor.py +0 -0
  81. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/sci/time_series/trainer.py +0 -0
  82. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/tpu_base.py +0 -0
  83. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/video/__init__.py +0 -0
  84. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/video/generators.py +0 -0
  85. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/video/motion_detection.py +0 -0
  86. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/video/resize.py +0 -0
  87. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/video/video_album.py +0 -0
  88. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/video/video_frames.py +0 -0
  89. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/processors/video/video_resample.py +0 -0
  90. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/queues.py +0 -0
  91. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/server.py +0 -0
  92. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/web/__init__.py +0 -0
  93. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/media/pipelines/web/server.py +0 -0
  94. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/ml/__init__.py +0 -0
  95. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/ml/base.py +0 -0
  96. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/ml/evaluation/__init__.py +0 -0
  97. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/ml/evaluation/object_detection.py +0 -0
  98. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/ml/images/__init__.py +0 -0
  99. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/ml/images/base.py +0 -0
  100. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/ml/images/description.py +0 -0
  101. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/ml/images/embeddings.py +0 -0
  102. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/ml/images/object_detection.py +0 -0
  103. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/ml/images/tools.py +0 -0
  104. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/ml/images/transformers.py +0 -0
  105. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/ml/nlp/__init__.py +0 -0
  106. {sciveo-0.1.39/sciveo/ml/video → sciveo-0.1.41/sciveo/ml/nlp/tokenizers}/__init__.py +0 -0
  107. {sciveo-0.1.39/sciveo/monitoring → sciveo-0.1.41/sciveo/ml/video}/__init__.py +0 -0
  108. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/ml/video/description.py +0 -0
  109. {sciveo-0.1.39/sciveo/network → sciveo-0.1.41/sciveo/monitoring}/__init__.py +0 -0
  110. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/monitoring/monitor.py +0 -0
  111. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/monitoring/start.py +0 -0
  112. {sciveo-0.1.39/sciveo/tools → sciveo-0.1.41/sciveo/network}/__init__.py +0 -0
  113. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/network/camera.py +0 -0
  114. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/network/sniffer.py +0 -0
  115. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/network/tools.py +0 -0
  116. {sciveo-0.1.39/sciveo/tools/aws → sciveo-0.1.41/sciveo/tools}/__init__.py +0 -0
  117. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/tools/array.py +0 -0
  118. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/tools/aws/priority_queue.py +0 -0
  119. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/tools/aws/s3.py +0 -0
  120. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/tools/common.py +0 -0
  121. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/tools/complexity.py +0 -0
  122. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/tools/compress.py +0 -0
  123. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/tools/configuration.py +0 -0
  124. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/tools/crypto.py +0 -0
  125. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/tools/daemon.py +0 -0
  126. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/tools/formating.py +0 -0
  127. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/tools/hardware.py +0 -0
  128. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/tools/http.py +0 -0
  129. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/tools/logger.py +0 -0
  130. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/tools/os.py +0 -0
  131. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/tools/random.py +0 -0
  132. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/tools/remote.py +0 -0
  133. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/tools/simple_counter.py +0 -0
  134. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/tools/synchronized.py +0 -0
  135. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo/tools/timers.py +0 -0
  136. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo.egg-info/dependency_links.txt +0 -0
  137. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo.egg-info/entry_points.txt +0 -0
  138. {sciveo-0.1.39 → sciveo-0.1.41}/sciveo.egg-info/top_level.txt +0 -0
  139. {sciveo-0.1.39 → sciveo-0.1.41}/setup.cfg +0 -0
  140. {sciveo-0.1.39 → sciveo-0.1.41}/test/test_complexity.py +0 -0
  141. {sciveo-0.1.39 → sciveo-0.1.41}/test/test_compress.py +0 -0
  142. {sciveo-0.1.39 → sciveo-0.1.41}/test/test_configuration.py +0 -0
  143. {sciveo-0.1.39 → sciveo-0.1.41}/test/test_crypto.py +0 -0
  144. {sciveo-0.1.39 → sciveo-0.1.41}/test/test_monitoring.py +0 -0
  145. {sciveo-0.1.39 → sciveo-0.1.41}/test/test_runner.py +0 -0
  146. {sciveo-0.1.39 → sciveo-0.1.41}/test/test_sampling.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sciveo
3
- Version: 0.1.39
3
+ Version: 0.1.41
4
4
  Description-Content-Type: text/markdown
5
5
  Provides-Extra: mon
6
6
  Provides-Extra: net
@@ -0,0 +1,304 @@
1
+ #
2
+ # Pavlin Georgiev, Softel Labs
3
+ #
4
+ # This is a proprietary file and may not be copied,
5
+ # distributed, or modified without express permission
6
+ # from the owner. For licensing inquiries, please
7
+ # contact pavlin@softel.bg.
8
+ #
9
+ # 2024
10
+ #
11
+
12
+ import os
13
+ import threading
14
+ import cv2
15
+ from PIL import Image
16
+ import json
17
+ import numpy as np
18
+
19
+ import matplotlib
20
+ import matplotlib.pyplot as plt
21
+ import matplotlib.patches as patches
22
+
23
+ import torch
24
+ import transformers
25
+ import torchvision
26
+
27
+ from transformers import AutoImageProcessor, MaskFormerForInstanceSegmentation
28
+
29
+
30
+ class ImageTilesSplit:
31
+ def __init__(self, image, name):
32
+ self.image = image
33
+ self.name = name
34
+
35
+ def split(self, tile_size=(640, 640)):
36
+ if self.image.shape[2] > 1:
37
+ original_image = Image.fromarray(cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB))
38
+ else:
39
+ original_image = Image.fromarray(self.image)
40
+ original_width, original_height = original_image.size
41
+
42
+ num_tiles_x = (original_width + tile_size[0] - 1) // tile_size[0]
43
+ num_tiles_y = (original_height + tile_size[1] - 1) // tile_size[1]
44
+
45
+ self.tiles_info = {
46
+ 'image': self.image,
47
+ 'original_image': original_image,
48
+ 'original_size': (original_width, original_height),
49
+ 'tile_size': tile_size,
50
+ 'num_tiles_x': num_tiles_x,
51
+ 'num_tiles_y': num_tiles_y,
52
+ 'tiles': {}
53
+ }
54
+
55
+ for i in range(num_tiles_x):
56
+ for j in range(num_tiles_y):
57
+ left = i * tile_size[0]
58
+ upper = j * tile_size[1]
59
+ right = min(left + tile_size[0], original_width)
60
+ lower = min(upper + tile_size[1], original_height)
61
+
62
+ tile = original_image.crop((left, upper, right, lower))
63
+
64
+ tile_key = f'tile_{i}_{j}'
65
+ self.tiles_info['tiles'][tile_key] = {
66
+ 'position': (i, j),
67
+ 'box': (left, upper, right, lower),
68
+ 'tile': tile,
69
+ # 'cv2.tile': cv2.cvtColor(np.array(tile), cv2.COLOR_RGB2BGR)
70
+ 'cv2.tile': np.array(tile)
71
+ }
72
+
73
+ return self.tiles_info
74
+
75
+ def join(self, tile_join_key="predicted"):
76
+ joined = np.zeros((self.tiles_info['original_size'][1], self.tiles_info['original_size'][0], 1), dtype=np.uint8)
77
+ for tile_key, tile_info in self.tiles_info['tiles'].items():
78
+ box = tile_info['box']
79
+ joined[box[1]:box[3], box[0]:box[2], 0] = tile_info[tile_join_key]
80
+ joined = np.squeeze(joined, axis=-1)
81
+ self.tiles_info[tile_join_key] = joined
82
+ return joined
83
+
84
+ def get_original_coordinates(self, tile_key, x, y):
85
+ """
86
+ Converts coordinates from a tile back to the original image.
87
+
88
+ Args:
89
+ tile_key (str): The key of the tile in the tiles_info dictionary.
90
+ x (int): The x-coordinate in the tile.
91
+ y (int): The y-coordinate in the tile.
92
+
93
+ Returns:
94
+ tuple: The coordinates (x_original, y_original) in the original image.
95
+ """
96
+ tile_data = self.tiles_info['tiles'][tile_key]
97
+ left, upper, _, _ = tile_data['box']
98
+
99
+ x_original = left + x
100
+ y_original = upper + y
101
+
102
+ return (x_original, y_original)
103
+
104
+ def plot_tiles_with_grid(self):
105
+ original_width, original_height = self.tiles_info['original_size']
106
+ tile_width, tile_height = self.tiles_info['tile_size']
107
+ num_tiles_x = self.tiles_info['num_tiles_x']
108
+ num_tiles_y = self.tiles_info['num_tiles_y']
109
+
110
+ fig, ax = plt.subplots(figsize=(10, 10))
111
+
112
+ ax.imshow(self.tiles_info['original_image'])
113
+
114
+ for i in range(num_tiles_x + 1):
115
+ x = i * tile_width
116
+ ax.axvline(x=x, color='r', linestyle='--', linewidth=1)
117
+
118
+ for j in range(num_tiles_y + 1):
119
+ y = j * tile_height
120
+ ax.axhline(y=y, color='r', linestyle='--', linewidth=1)
121
+
122
+ ax.set_xlim(0, original_width)
123
+ ax.set_ylim(original_height, 0)
124
+
125
+ for i in range(num_tiles_x):
126
+ for j in range(num_tiles_y):
127
+ x_center = i * tile_width + tile_width / 2
128
+ y_center = j * tile_height + tile_height / 2
129
+ ax.text(x_center, y_center, f'{i},{j}', color=(0,1,0), fontsize=7, ha='center', va='center')
130
+
131
+ plt.title(f"{self.name} Grid")
132
+ plt.xlabel("X")
133
+ plt.ylabel("Y")
134
+ plt.show()
135
+
136
+ def plot_tiles_individually(self):
137
+ num_tiles_x = self.tiles_info['num_tiles_x']
138
+ num_tiles_y = self.tiles_info['num_tiles_y']
139
+
140
+ fig, axes = plt.subplots(num_tiles_y, num_tiles_x, figsize=(15, 15))
141
+
142
+ if num_tiles_x == 1 and num_tiles_y == 1:
143
+ axes = [[axes]]
144
+ elif num_tiles_x == 1:
145
+ axes = [[ax] for ax in axes]
146
+ elif num_tiles_y == 1:
147
+ axes = [axes]
148
+
149
+ for i in range(num_tiles_x):
150
+ for j in range(num_tiles_y):
151
+ tile_key = f'tile_{i}_{j}'
152
+ tile = self.tiles_info['tiles'][tile_key]['tile']
153
+
154
+ ax = axes[j][i]
155
+
156
+ ax.imshow(tile, cmap='gray')
157
+ ax.set_title(f'{i}.{j}', fontsize=8)
158
+ ax.axis('off')
159
+
160
+ plt.tight_layout()
161
+ plt.show()
162
+
163
+
164
+ class MaskInstancePredictor:
165
+ def __init__(self, cache_dir, device="cuda", colors=None):
166
+ if colors is None:
167
+ self.colors = [
168
+ [0, 0, 255],
169
+ [0, 255, 0],
170
+ [255, 0, 0],
171
+ [255, 255, 255]
172
+ ]
173
+ else:
174
+ self.colors = colors
175
+
176
+ self.device = device
177
+ self.cache_dir = cache_dir
178
+
179
+ self.processor = AutoImageProcessor.from_pretrained("facebook/maskformer-swin-base-ade")
180
+ self.model = MaskFormerForInstanceSegmentation.from_pretrained(
181
+ "facebook/maskformer-swin-base-ade",
182
+ cache_dir=cache_dir
183
+ ).to(self.device)
184
+
185
+ def relabel_predictions(self, predictions, label_map, new_labels):
186
+ relabeled = np.full_like(predictions, fill_value=-1)
187
+
188
+ for label_id, label_name in label_map.items():
189
+ if label_name in new_labels:
190
+ relabeled[predictions == label_id] = new_labels[label_name]
191
+
192
+ return relabeled
193
+
194
+ def predict_one(self, image):
195
+ inputs = self.processor(images=image, return_tensors="pt").to(self.device)
196
+ with torch.no_grad():
197
+ outputs = self.model(**inputs)
198
+
199
+ predicted = self.processor.post_process_semantic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]
200
+ return predicted.to("cpu")
201
+
202
+ def plot_mask(self, image, mask, alpha=0.5):
203
+ color_mask = np.zeros((mask.shape[0], mask.shape[1], 3), dtype=np.uint8) # height, width, 3
204
+ for label, color in enumerate(self.colors):
205
+ color_mask[mask == label, :] = color
206
+ color_mask = color_mask[..., ::-1] # Convert to BGR
207
+
208
+ masked_image = np.array(image) * (1 - alpha) + color_mask * alpha
209
+ masked_image = masked_image.astype(np.uint8)
210
+
211
+ plt.figure(figsize=(15, 10))
212
+ plt.imshow(masked_image)
213
+ plt.show()
214
+
215
+
216
+ class UrbanPredictor:
217
+ def __init__(self, name, cache_dir, device="cuda"):
218
+ self.name = name
219
+ self.device = device
220
+ self.cache_dir = cache_dir
221
+
222
+ self.predictor = MaskInstancePredictor(cache_dir, device=device)
223
+ self._classes = {"artificial": 0, "natural": 1}
224
+ self.ade_classes = {
225
+ 0: 'artificial', 1: 'artificial', 2: 'natural', 3: 'artificial', 4: 'natural', 5: 'artificial',6: 'artificial',7: 'artificial',8: 'artificial',9: 'natural',
226
+ 10: 'artificial',11: 'artificial',12: 'artificial',13: 'natural',14: 'artificial',15: 'artificial',16: 'natural',17: 'natural',18: 'artificial',19: 'artificial',
227
+ 20: 'artificial',21: 'natural',22: 'artificial',23: 'artificial',24: 'artificial',25: 'artificial',26: 'natural',27: 'artificial',28: 'artificial',29: 'natural',
228
+ 30: 'artificial',31: 'artificial',32: 'artificial',33: 'artificial',34: 'natural',35: 'artificial',36: 'artificial',37: 'artificial',38: 'artificial',39: 'artificial',
229
+ 40: 'artificial',41: 'artificial',42: 'artificial',43: 'artificial',44: 'artificial',45: 'artificial',46: 'natural',47: 'artificial',48: 'artificial',49: 'artificial',
230
+ 50: 'artificial',51: 'artificial',52: 'artificial',53: 'artificial',54: 'artificial',55: 'artificial',56: 'artificial',57: 'artificial',58: 'artificial',59: 'artificial',
231
+ 60: 'natural',61: 'artificial',62: 'artificial', 63: 'artificial', 64: 'artificial',65: 'artificial',66: 'natural',67: 'artificial',68: 'natural',69: 'artificial',
232
+ 70: 'artificial',71: 'artificial',72: 'natural',73: 'artificial',74: 'artificial',75: 'artificial',76: 'artificial',77: 'artificial',78: 'artificial',79: 'artificial',
233
+ 80: 'artificial',81: 'artificial',82: 'artificial',83: 'artificial',84: 'artificial',85: 'artificial',86: 'artificial',87: 'artificial',88: 'artificial',89: 'artificial',
234
+ 90: 'artificial',91: 'artificial',92: 'artificial',93: 'artificial',94: 'natural',95: 'artificial',96: 'artificial',97: 'artificial',98: 'artificial',99: 'artificial',
235
+ 100: 'artificial',101: 'artificial',102: 'artificial',103: 'artificial',104: 'artificial',105: 'artificial',106: 'artificial',107: 'artificial',108: 'artificial',109: 'artificial',
236
+ 110: 'artificial',111: 'artificial',112: 'artificial',113: 'natural',114: 'artificial',115: 'artificial',116: 'artificial',117: 'artificial',118: 'artificial',119: 'artificial',
237
+ 120: 'artificial',121: 'artificial',122: 'artificial',123: 'artificial',124: 'artificial',125: 'artificial',126: 'natural',127: 'artificial',128: 'natural',129: 'artificial',
238
+ 130: 'artificial',131: 'artificial',132: 'artificial',133: 'artificial',134: 'artificial',135: 'artificial',136: 'artificial',137: 'artificial',138: 'artificial',139: 'artificial',
239
+ 140: 'artificial',141: 'artificial',142: 'artificial',143: 'artificial',144: 'artificial',145: 'artificial',146: 'artificial',147: 'artificial',148: 'artificial',149: 'artificial'
240
+ }
241
+ # natural_labels = {
242
+ # 'sky', 'tree', 'grass', 'mountain, mount', 'plant', 'water', 'earth, ground',
243
+ # 'rock, stone', 'sand', 'flower', 'hill', 'palm, palm tree', 'river', 'sea',
244
+ # 'field', 'land, ground, soil', 'falls', 'lake', 'animal'
245
+ # }
246
+ # self.ade_classes = {
247
+ # key: "natural" if value in natural_labels else "artificial"
248
+ # for key, value in self.predictor.model.config.id2label.items()
249
+ # }
250
+
251
+ def predict(self, image, w=512):
252
+ self.tile_split = ImageTilesSplit(image, name=self.name)
253
+ self.tile_split.split(tile_size=(w, w))
254
+
255
+ n = 1
256
+ l = len(self.tile_split.tiles_info['tiles'])
257
+ for tile_key, tile_info in self.tile_split.tiles_info['tiles'].items():
258
+ mask = self.predictor.predict_one(tile_info['tile'])
259
+ mask = self.predictor.relabel_predictions(mask, self.ade_classes, self._classes)
260
+ self.tile_split.tiles_info['tiles'][tile_key]['predicted'] = mask
261
+ if n % 100 == 0:
262
+ info(f"predict {self.name}", f"{n}/{l}", f"on {self.device}")
263
+ n += 1
264
+
265
+ self.tile_split.join(tile_join_key="predicted")
266
+
267
+ def plot(self, alpha=0.5):
268
+ self.predictor.plot_mask(self.tile_split.tiles_info['image'], self.tile_split.tiles_info['predicted'], alpha=alpha)
269
+
270
+ def plot_tile(self, tile_key, alpha=0.5):
271
+ self.predictor.plot_mask(self.tile_split.tiles_info['tiles'][tile_key]['tile'], self.tile_split.tiles_info['tiles'][tile_key]['predicted'], alpha=alpha)
272
+
273
+
274
+ class ThrPredictor:
275
+ plot_lock = threading.Lock()
276
+
277
+ def __init__(self, name, base_path, w, cache_dir, device):
278
+ self.image = cv2.imread(os.path.join(base_path, name))
279
+ self.name = name
280
+ self.w = w
281
+ self.cache_dir = cache_dir
282
+ self.device = device
283
+
284
+ def start(self):
285
+ self.t = threading.Thread(target = self.run)
286
+ self.t.start()
287
+
288
+ def join(self):
289
+ self.t.join()
290
+
291
+ def plot(self, alpha=0.3):
292
+ info("Plot", self.name)
293
+ self.predictor.tile_split.plot_tiles_with_grid()
294
+ self.predictor.plot(alpha=alpha)
295
+ # self.predictor.plot_tile("tile_11_11", alpha=alpha)
296
+
297
+ def run(self):
298
+ try:
299
+ self.predictor = UrbanPredictor(name=self.name, cache_dir=self.cache_dir, device=self.device)
300
+ self.predictor.predict(self.image, w=self.w)
301
+ with ThrPredictor.plot_lock:
302
+ self.plot(alpha=0.3)
303
+ except Exception as e:
304
+ error(e)
@@ -23,6 +23,7 @@ import numpy as np
23
23
  import pandas as pd
24
24
 
25
25
  from transformers import AutoTokenizer, AutoModel
26
+ from sentence_transformers import SentenceTransformer
26
27
 
27
28
  from sciveo.tools.logger import *
28
29
  from sciveo.ml.base import BaseML
@@ -128,3 +129,24 @@ class TextEmbedding(BaseML):
128
129
  embedding = embedding.tolist()
129
130
  predictions.append(embedding)
130
131
  return predictions
132
+
133
+
134
+ class SentenceEmbedding(TextEmbedding):
135
+ def __init__(self, model_name='BAAI/bge-m3', cache_dir=None, device=None) -> None:
136
+ super().__init__(model_name=model_name, cache_dir=cache_dir, device=device)
137
+ self.model = None
138
+ self.max_tokens = 8192
139
+ self.normalize_embeddings = True
140
+
141
+ def init(self):
142
+ if self.model is None:
143
+ self.model = SentenceTransformer(self.model_name).to(self.device)
144
+ self.post_init()
145
+
146
+ def predict_one(self, x):
147
+ return self.predict([x])[0]
148
+
149
+ def predict(self, X):
150
+ self.init()
151
+ predictions = self.model.encode(X, normalize_embeddings=self.normalize_embeddings)
152
+ return predictions.detach().numpy()
@@ -0,0 +1,76 @@
1
+ #
2
+ # Pavlin Georgiev, Softel Labs
3
+ #
4
+ # This is a proprietary file and may not be copied,
5
+ # distributed, or modified without express permission
6
+ # from the owner. For licensing inquiries, please
7
+ # contact pavlin@softel.bg.
8
+ #
9
+ # 2024
10
+ #
11
+
12
+ import re
13
+
14
+ from sciveo.tools.logger import *
15
+
16
+
17
+ class BPETokenizer:
18
+ def __init__(self, max_size):
19
+ self.initial_tokens = 256
20
+ self.max_size = max_size
21
+ self.max_merges = max_size - self.initial_tokens
22
+ self.vocab = {}
23
+ self.merges = {}
24
+
25
+ def encode(self, text):
26
+ tokens = list(map(int, text.encode("utf-8")))
27
+ l1 = len(tokens)
28
+ for k, v in self.merges.items():
29
+ self.merge(tokens, k, v)
30
+ debug(f"encoded ratio {len(tokens) / l1:.2}x")
31
+ return tokens
32
+
33
+ def decode_token(self, token):
34
+ if token not in self.vocab:
35
+ return [token]
36
+
37
+ bigram = self.vocab[token]
38
+ return self.decode_token(bigram[0]) + self.decode_token(bigram[1])
39
+
40
+ def decode(self, tokens):
41
+ decoded = []
42
+ for token in tokens:
43
+ decoded += self.decode_token(token)
44
+ return bytes(decoded).decode("utf-8", errors="replace")
45
+
46
+ def train(self, text, debug_step=100):
47
+ tokens = list(map(int, text.encode("utf-8")))
48
+ token_id = self.initial_tokens
49
+ debug("max_merges", self.max_merges)
50
+ while(len(self.merges) < self.max_merges):
51
+ current_counts = self.counts(tokens)
52
+ bigram = max(current_counts, key=current_counts.get)
53
+ self.merge(tokens, bigram, token_id)
54
+ self.merges[bigram] = token_id
55
+ self.vocab[token_id] = bigram
56
+ token_id += 1
57
+ if len(self.merges) % debug_step == 0:
58
+ debug("train", f"{len(self.merges)}/{self.max_merges}")
59
+
60
+ def fit(self, x):
61
+ return self.train(x)
62
+
63
+ def counts(self, tokens):
64
+ result = {}
65
+ for bigram in zip(tokens, tokens[1:]):
66
+ result.setdefault(bigram, 0)
67
+ result[bigram] += 1
68
+ return result
69
+
70
+ def merge(self, tokens, bigram, token_id):
71
+ i = 0
72
+ while i < len(tokens) - 1:
73
+ if tokens[i] == bigram[0] and tokens[i + 1] == bigram[1]:
74
+ tokens[i] = token_id
75
+ del tokens[i + 1]
76
+ i += 1
File without changes
@@ -0,0 +1,2 @@
1
+
2
+ __version__ = '0.1.41'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sciveo
3
- Version: 0.1.39
3
+ Version: 0.1.41
4
4
  Description-Content-Type: text/markdown
5
5
  Provides-Extra: mon
6
6
  Provides-Extra: net
@@ -96,10 +96,13 @@ sciveo/ml/images/base.py
96
96
  sciveo/ml/images/description.py
97
97
  sciveo/ml/images/embeddings.py
98
98
  sciveo/ml/images/object_detection.py
99
+ sciveo/ml/images/segmentation.py
99
100
  sciveo/ml/images/tools.py
100
101
  sciveo/ml/images/transformers.py
101
102
  sciveo/ml/nlp/__init__.py
102
103
  sciveo/ml/nlp/embeddings.py
104
+ sciveo/ml/nlp/tokenizers/__init__.py
105
+ sciveo/ml/nlp/tokenizers/bpe.py
103
106
  sciveo/ml/video/__init__.py
104
107
  sciveo/ml/video/description.py
105
108
  sciveo/monitoring/__init__.py
@@ -136,4 +139,5 @@ test/test_configuration.py
136
139
  test/test_crypto.py
137
140
  test/test_monitoring.py
138
141
  test/test_runner.py
139
- test/test_sampling.py
142
+ test/test_sampling.py
143
+ test/test_tokenizers.py
@@ -54,6 +54,7 @@ torch>=0.0.0
54
54
  torchvision>=0.0.0
55
55
  diffusers>=0.0.0
56
56
  transformers>=0.0.0
57
+ sentence_transformers>=0.0.0
57
58
  accelerate>=0.0.0
58
59
  annoy>=0.0.0
59
60
  ultralytics>=0.0.0
@@ -65,6 +66,7 @@ torch>=0.0.0
65
66
  torchvision>=0.0.0
66
67
  diffusers>=0.0.0
67
68
  transformers>=0.0.0
69
+ sentence_transformers>=0.0.0
68
70
  accelerate>=0.0.0
69
71
  annoy>=0.0.0
70
72
  ultralytics>=0.0.0
@@ -29,7 +29,7 @@ extras_require = {
29
29
  'media-ml': [
30
30
  'tensorflow>=0.0.0', 'keras>=0.0.0',
31
31
  'torch>=0.0.0', 'torchvision>=0.0.0',
32
- 'diffusers>=0.0.0', 'transformers>=0.0.0', 'accelerate>=0.0.0', 'annoy>=0.0.0',
32
+ 'diffusers>=0.0.0', 'transformers>=0.0.0', 'sentence_transformers>=0.0.0', 'accelerate>=0.0.0', 'annoy>=0.0.0',
33
33
  'ultralytics>=0.0.0'
34
34
  ]
35
35
  }
@@ -0,0 +1,34 @@
1
+ #
2
+ # Pavlin Georgiev, Softel Labs
3
+ #
4
+ # This is a proprietary file and may not be copied,
5
+ # distributed, or modified without express permission
6
+ # from the owner. For licensing inquiries, please
7
+ # contact pavlin@softel.bg.
8
+ #
9
+ # 2024
10
+ #
11
+
12
+ import math
13
+ import unittest
14
+
15
+ from sciveo.ml.nlp.tokenizers.bpe import *
16
+
17
+
18
+ class TestTokenizers(unittest.TestCase):
19
+ def test_BPE(self):
20
+ text = "節樂,《漢語大詞典》一則:「《史記.樂書》:凡作樂者,所以節槳。張守義正義:音洛,言不樂至荒淫也, 網站有中、英文版本,也有繁、簡體版,可通過每頁左上角的連結隨時調整 Unicode! 🅤🅝🅘🅒🅞🅓🅔‽ 🇺‌🇳‌🇮‌🇨‌🇴‌🇩‌🇪! 😄 The very name strikes fear and awe into the hearts of programmers worldwide. We all know we ought to “support Unicode” in our software (whatever that means—like using wchar_t for all the strings, right?)"
21
+ text += "Using a row in the above table to encode a code point less than 'First code point' (thus using more bytes than necessary) is termed an overlong encoding. These are a security problem because they allow the same code point to be encoded in multiple ways. Overlong encodings (of ../ for example) have been used to bypass security validations in high-profile products including Microsoft's IIS web server[14] and Apache's Tomcat servlet container.[15] Overlong encodings should therefore be considered an error and never decoded. Modified UTF-8 allows an overlong encoding of U+0000."
22
+
23
+ T = BPETokenizer(max_size=512)
24
+ T.train(text)
25
+
26
+ t = "你好世界,美好的一天"
27
+ self.assertTrue(T.decode(T.encode(t)) == t)
28
+
29
+ t = "hello world and testing"
30
+ self.assertTrue(T.decode(T.encode(t)) == t)
31
+
32
+
33
+ if __name__ == '__main__':
34
+ unittest.main()
@@ -1,2 +0,0 @@
1
-
2
- __version__ = '0.1.39'
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes