sciveo 0.1.34__tar.gz → 0.1.35__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. {sciveo-0.1.34 → sciveo-0.1.35}/PKG-INFO +1 -1
  2. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/cli.py +9 -1
  3. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/pipeline.py +3 -3
  4. sciveo-0.1.35/sciveo/media/pipelines/processors/image/embeddings.py +49 -0
  5. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/image/generators.py +7 -35
  6. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/video/generators.py +7 -14
  7. sciveo-0.1.35/sciveo/ml/base.py +55 -0
  8. sciveo-0.1.35/sciveo/ml/images/base.py +48 -0
  9. sciveo-0.1.35/sciveo/ml/images/description.py +64 -0
  10. sciveo-0.1.35/sciveo/ml/images/embeddings.py +100 -0
  11. sciveo-0.1.35/sciveo/ml/nlp/embeddings.py +128 -0
  12. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/network/tools.py +1 -1
  13. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/tools/logger.py +3 -1
  14. sciveo-0.1.35/sciveo/version.py +2 -0
  15. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo.egg-info/PKG-INFO +1 -1
  16. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo.egg-info/SOURCES.txt +5 -0
  17. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo.egg-info/requires.txt +2 -2
  18. {sciveo-0.1.34 → sciveo-0.1.35}/setup.py +1 -1
  19. sciveo-0.1.34/sciveo/media/pipelines/processors/image/embeddings.py +0 -80
  20. sciveo-0.1.34/sciveo/version.py +0 -2
  21. {sciveo-0.1.34 → sciveo-0.1.35}/README.md +0 -0
  22. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/__init__.py +0 -0
  23. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/api/__init__.py +0 -0
  24. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/api/base.py +0 -0
  25. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/api/upload.py +0 -0
  26. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/common/__init__.py +0 -0
  27. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/common/configuration.py +0 -0
  28. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/common/model.py +0 -0
  29. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/common/optimizers.py +0 -0
  30. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/common/sampling.py +0 -0
  31. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/content/__init__.py +0 -0
  32. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/content/dataset.py +0 -0
  33. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/content/experiment.py +0 -0
  34. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/content/project.py +0 -0
  35. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/content/runner.py +0 -0
  36. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/__init__.py +0 -0
  37. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/ml/__init__.py +0 -0
  38. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/ml/base.py +0 -0
  39. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/ml/encoders/__init__.py +0 -0
  40. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/ml/encoders/base.py +0 -0
  41. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/ml/encoders/normalizer.py +0 -0
  42. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/ml/nlp/__init__.py +0 -0
  43. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/ml/nlp/search.py +0 -0
  44. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/ml/time_series/__init__.py +0 -0
  45. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/ml/time_series/dataset.py +0 -0
  46. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/ml/time_series/predictor.py +0 -0
  47. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/ml/time_series/trainer.py +0 -0
  48. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/ml/time_series/window_generator.py +0 -0
  49. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/__init__.py +0 -0
  50. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/base.py +0 -0
  51. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/job_daemon.py +0 -0
  52. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/layouts/__init__.py +0 -0
  53. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/layouts/base.py +0 -0
  54. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/postprocessors/__init__.py +0 -0
  55. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/postprocessors/base.py +0 -0
  56. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/postprocessors/default.py +0 -0
  57. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/__init__.py +0 -0
  58. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/audio/__init__.py +0 -0
  59. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/audio/audio.py +0 -0
  60. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/audio/audio_extractor_process.py +0 -0
  61. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/aws.py +0 -0
  62. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/base.py +0 -0
  63. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/file/__init__.py +0 -0
  64. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/file/archive.py +0 -0
  65. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/image/__init__.py +0 -0
  66. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/image/album.py +0 -0
  67. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/image/album_in_image.py +0 -0
  68. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/image/depth_esimation.py +0 -0
  69. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/image/filters.py +0 -0
  70. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/image/histogram.py +0 -0
  71. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/image/mask.py +0 -0
  72. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/image/object_detection.py +0 -0
  73. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/image/resize.py +0 -0
  74. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/image/segmentation.py +0 -0
  75. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/image/watermark.py +0 -0
  76. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/media_info.py +0 -0
  77. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/nlp/__init__.py +0 -0
  78. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/nlp/address.py +0 -0
  79. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/qr.py +0 -0
  80. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/sci/__init__.py +0 -0
  81. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/sci/base.py +0 -0
  82. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/sci/dataset.py +0 -0
  83. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/sci/time_series/__init__.py +0 -0
  84. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/sci/time_series/predictor.py +0 -0
  85. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/sci/time_series/trainer.py +0 -0
  86. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/tpu_base.py +0 -0
  87. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/video/__init__.py +0 -0
  88. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/video/motion_detection.py +0 -0
  89. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/video/resize.py +0 -0
  90. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/video/video_album.py +0 -0
  91. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/video/video_frames.py +0 -0
  92. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/processors/video/video_resample.py +0 -0
  93. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/queues.py +0 -0
  94. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/server.py +0 -0
  95. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/web/__init__.py +0 -0
  96. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/media/pipelines/web/server.py +0 -0
  97. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/ml/__init__.py +0 -0
  98. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/ml/evaluation/__init__.py +0 -0
  99. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/ml/evaluation/object_detection.py +0 -0
  100. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/ml/images/__init__.py +0 -0
  101. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/ml/images/object_detection.py +0 -0
  102. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/ml/images/tools.py +0 -0
  103. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/ml/images/transforms.py +0 -0
  104. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/ml/nlp/__init__.py +0 -0
  105. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/monitoring/__init__.py +0 -0
  106. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/monitoring/monitor.py +0 -0
  107. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/monitoring/start.py +0 -0
  108. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/network/__init__.py +0 -0
  109. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/network/camera.py +0 -0
  110. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/network/sniffer.py +0 -0
  111. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/tools/__init__.py +0 -0
  112. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/tools/array.py +0 -0
  113. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/tools/aws/__init__.py +0 -0
  114. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/tools/aws/priority_queue.py +0 -0
  115. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/tools/aws/s3.py +0 -0
  116. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/tools/common.py +0 -0
  117. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/tools/compress.py +0 -0
  118. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/tools/configuration.py +0 -0
  119. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/tools/crypto.py +0 -0
  120. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/tools/daemon.py +0 -0
  121. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/tools/formating.py +0 -0
  122. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/tools/hardware.py +0 -0
  123. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/tools/http.py +0 -0
  124. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/tools/os.py +0 -0
  125. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/tools/random.py +0 -0
  126. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/tools/remote.py +0 -0
  127. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/tools/simple_counter.py +0 -0
  128. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/tools/synchronized.py +0 -0
  129. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo/tools/timers.py +0 -0
  130. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo.egg-info/dependency_links.txt +0 -0
  131. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo.egg-info/entry_points.txt +0 -0
  132. {sciveo-0.1.34 → sciveo-0.1.35}/sciveo.egg-info/top_level.txt +0 -0
  133. {sciveo-0.1.34 → sciveo-0.1.35}/setup.cfg +0 -0
  134. {sciveo-0.1.34 → sciveo-0.1.35}/test/test_compress.py +0 -0
  135. {sciveo-0.1.34 → sciveo-0.1.35}/test/test_configuration.py +0 -0
  136. {sciveo-0.1.34 → sciveo-0.1.35}/test/test_crypto.py +0 -0
  137. {sciveo-0.1.34 → sciveo-0.1.35}/test/test_monitoring.py +0 -0
  138. {sciveo-0.1.34 → sciveo-0.1.35}/test/test_runner.py +0 -0
  139. {sciveo-0.1.34 → sciveo-0.1.35}/test/test_sampling.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sciveo
3
- Version: 0.1.34
3
+ Version: 0.1.35
4
4
  Description-Content-Type: text/markdown
5
5
  Provides-Extra: mon
6
6
  Provides-Extra: net
@@ -21,7 +21,13 @@ def main():
21
21
  config = GlobalConfiguration.get()
22
22
 
23
23
  parser = argparse.ArgumentParser(description='sciveo CLI')
24
- parser.add_argument('command', choices=['init', 'monitor', 'scan', 'media-server', 'media-run'], help='Command to execute')
24
+ parser.add_argument(
25
+ 'command',
26
+ choices=[
27
+ 'init', 'monitor', 'scan',
28
+ 'media-server', 'media-run'
29
+ ],
30
+ help='Command to execute')
25
31
 
26
32
  parser.add_argument('--period', type=int, default=120, help='Period in seconds')
27
33
  parser.add_argument('--block', type=bool, default=True, help='Block flag')
@@ -36,6 +42,8 @@ def main():
36
42
  parser.add_argument('--height', type=str, default=None, help='height')
37
43
  parser.add_argument('--rate', type=int, help='Rate number')
38
44
  parser.add_argument('--processor', type=str, help='Processor name')
45
+ parser.add_argument('--src', type=str, default=None, help='Source')
46
+ parser.add_argument('--dst', type=str, default=None, help='Destination')
39
47
 
40
48
  args = parser.parse_args()
41
49
 
@@ -70,7 +70,7 @@ class MediaPipeline:
70
70
  "video-frames-extract": VideoFramesExtract,
71
71
  "video-motion-detector": VideoMotionDetector,
72
72
  "video-album": VideoAlbum,
73
- "video-to-text": VideoToText,
73
+ "video-to-text": VideoToTextProcessor,
74
74
 
75
75
  "image-resize": ImageResize,
76
76
  "image-histogram": ImageHistogram,
@@ -80,11 +80,11 @@ class MediaPipeline:
80
80
  "image-watermark": ImageWatermark,
81
81
  "image-diffusion": ImageDiffusionText,
82
82
  "image-diffusion-image-text": ImageDiffusionImageText,
83
- "image-to-text": ImageToText,
83
+ "image-to-text": ImageToTextProcessor,
84
84
  "image-fgbg-filter": ImageFGBGFilter,
85
85
  "image-segmentation": ImageSegmentation,
86
86
  "image-depth-estimation": ImageDepthEstimation,
87
- "image-embedding": ImageEmbedding,
87
+ "image-embedding": ImageEmbeddingProcessor,
88
88
  "image-object-detection": ImageObjectDetectionProcessor,
89
89
 
90
90
  "sci-timeseries-predictor": TimeSeriesPredictorProcessor,
@@ -0,0 +1,49 @@
1
+ #
2
+ # Pavlin Georgiev, Softel Labs
3
+ #
4
+ # This is a proprietary file and may not be copied,
5
+ # distributed, or modified without express permission
6
+ # from the owner. For licensing inquiries, please
7
+ # contact pavlin@softel.bg.
8
+ #
9
+ # 2024
10
+ #
11
+
12
+ import os
13
+ import boto3
14
+ import cv2
15
+ from PIL import Image
16
+
17
+ import torch
18
+ from torchvision import models, transforms
19
+
20
+ from sciveo.tools.logger import *
21
+ from sciveo.tools.common import *
22
+ from sciveo.media.pipelines.processors.tpu_base import *
23
+ from sciveo.media.pipelines.base import ApiContent
24
+ from sciveo.nlp.images.embeddings import ImageEmbedding
25
+
26
+
27
+ class ImageEmbeddingProcessor(TPUBaseProcessor):
28
+ def __init__(self, processor_config, max_progress) -> None:
29
+ super().__init__(processor_config, max_progress)
30
+
31
+ self.default.update({
32
+ "model_id": 1,
33
+ "output": False
34
+ })
35
+
36
+ self.predictor = ImageEmbedding(self['model_id'])
37
+ self.api = ApiContent()
38
+
39
+ def process(self, media):
40
+ debug("process", media['guid'])
41
+ embedding = self.predictor.predict_one(media)
42
+ self.api.update(media, {"embedding_resnet_512": list(embedding)})
43
+ return media
44
+
45
+ def content_type(self):
46
+ return "image"
47
+
48
+ def name(self):
49
+ return "image-embedding"
@@ -24,6 +24,7 @@ from sciveo.tools.logger import *
24
24
  from sciveo.tools.common import *
25
25
  from sciveo.media.pipelines.processors.tpu_base import *
26
26
  from sciveo.media.pipelines.base import ApiContent
27
+ from sciveo.ml.images.description import ImageToText
27
28
 
28
29
 
29
30
  class ImageDiffusionText(TPUBaseProcessor):
@@ -150,14 +151,13 @@ class ImageDiffusionText(TPUBaseProcessor):
150
151
  return False
151
152
 
152
153
 
153
- class ImageToText(TPUBaseProcessor):
154
+ class ImageToTextProcessor(TPUBaseProcessor):
154
155
  def __init__(self, processor_config, max_progress) -> None:
155
156
  super().__init__(processor_config, max_progress)
156
157
 
157
158
  self.api = ApiContent()
158
159
 
159
- cache_dir = os.path.join(os.environ['MEDIA_MODELS_BASE_PATH'], "models/")
160
-
160
+ self.cache_dir = os.path.join(os.environ['MEDIA_MODELS_BASE_PATH'], "models/")
161
161
  self.device = os.environ.get("MEDIA_PROCESSING_BACKEND", "cpu")
162
162
 
163
163
  self.default.update({
@@ -166,44 +166,16 @@ class ImageToText(TPUBaseProcessor):
166
166
  "output": False
167
167
  })
168
168
 
169
- self.models = [
170
- ["GIT", "softel/git-base-v1.0", "auto"],
171
- ["GIT", "softel/git-large-v1.0", "auto"],
172
- ["BLIP2", "softel/blip2-opt-2.7b-v1.0", torch.float16],
173
- # ["BLIP2", "softel/blip2-opt-6.7b-v1.0", torch.float16],
174
- ]
175
-
176
- model_config = self.models[self['model_id']]
177
- self.dtype = model_config[2]
178
-
179
- if model_config[0] == "GIT":
180
- self.pipe = AutoProcessor.from_pretrained(model_config[1], cache_dir=cache_dir)
181
- self.model = AutoModelForCausalLM.from_pretrained(model_config[1], torch_dtype=self.dtype, cache_dir=cache_dir).to(self.device)
182
- elif model_config[0] == "BLIP2":
183
- self.pipe = Blip2Processor.from_pretrained(model_config[1], cache_dir=cache_dir)
184
- self.model = Blip2ForConditionalGeneration.from_pretrained(model_config[1], torch_dtype=self.dtype, device_map="auto", cache_dir=cache_dir)
185
-
186
- debug("model name", model_config[1], "on device", self.device, "dtype", self.dtype, self.model.dtype)
187
- self.dtype = self.model.dtype
169
+ self.predictor = None
188
170
 
189
171
  def process(self, media):
190
172
  debug("process", media['guid'])
173
+ if self.predictor is None:
174
+ self.predictor = ImageToText(self['model_id'], self['max_length'], self.cache_dir, self.device)
191
175
  local_path = media["local_path"]
192
- frame = cv2.cvtColor(cv2.imread(local_path), cv2.COLOR_BGR2RGB)
193
- predict = self.predict_image_text(frame)
176
+ predict = self.predictor.predict_one(local_path)
194
177
  return self.set_media(media, predict)
195
178
 
196
- def predict_image_text(self, frame):
197
- pixel_values = self.pipe(images=frame, return_tensors="pt").pixel_values.to(self.device, self.dtype)
198
- ids = self.model.generate(pixel_values=pixel_values, max_length=self["max_length"])
199
- predict = self.pipe.batch_decode(ids, skip_special_tokens=True)[0]
200
-
201
- del ids
202
- del pixel_values
203
-
204
- debug("process predict", predict)
205
- return predict
206
-
207
179
  def set_media(self, media, predict):
208
180
  media.setdefault("next", [])
209
181
  media["next"].append({
@@ -19,28 +19,21 @@ from transformers import AutoProcessor, AutoModelForCausalLM
19
19
 
20
20
  from sciveo.tools.common import *
21
21
  from sciveo.media.pipelines.processors.tpu_base import *
22
- from sciveo.media.pipelines.processors.image.generators import ImageToText
22
+ from sciveo.media.pipelines.processors.image.generators import ImageToTextProcessor
23
23
  from sciveo.media.pipelines.base import ApiContent
24
+ from sciveo.ml.video.description import VideoToText
24
25
 
25
26
 
26
- class VideoToText(ImageToText):
27
+ class VideoToTextProcessor(ImageToTextProcessor):
27
28
  def __init__(self, processor_config, max_progress) -> None:
28
29
  super().__init__(processor_config, max_progress)
29
30
 
30
31
  def process(self, media):
31
32
  debug("process", media['guid'])
32
- local_path = media["local_path"]
33
-
34
- cap = cv2.VideoCapture(local_path)
35
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
36
- cap.set(cv2.CAP_PROP_POS_FRAMES, total_frames // 2)
37
- ret, frame = cap.read()
38
- cap.release()
39
- if ret:
40
- predict = self.predict_image_text(frame)
41
- return self.set_media(media, predict)
42
- else:
43
- return media
33
+ if self.predictor is None:
34
+ self.predictor = VideoToText(self['model_id'], self['max_length'], self.cache_dir, self.device)
35
+ prediction = self.predictor.predict_one(media["local_path"])
36
+ return self.set_media(media, prediction)
44
37
 
45
38
  def content_type(self):
46
39
  return "video"
@@ -0,0 +1,55 @@
1
+ #
2
+ # Pavlin Georgiev, Softel Labs
3
+ #
4
+ # This is a proprietary file and may not be copied,
5
+ # distributed, or modified without express permission
6
+ # from the owner. For licensing inquiries, please
7
+ # contact pavlin@softel.bg.
8
+ #
9
+ # 2023
10
+ #
11
+
12
+ from sciveo.tools.logger import *
13
+ from sciveo.tools.configuration import GlobalConfiguration
14
+
15
+
16
+ class BaseML:
17
+ def __init__(self, model_name="base", cache_dir=None, device=None) -> None:
18
+ self.model_name = model_name
19
+ self.config = GlobalConfiguration.get()
20
+
21
+ if cache_dir is None:
22
+ self.cache_dir = self.config['MODELS_BASE_PATH']
23
+ else:
24
+ self.cache_dir = cache_dir
25
+
26
+ if device is None:
27
+ self.device = os.environ.get("MEDIA_PROCESSING_BACKEND", "cpu")
28
+ else:
29
+ self.device = device
30
+
31
+ self.init_models()
32
+
33
+ def init_models(self):
34
+ warning("init_models not implemented")
35
+
36
+ def init(self):
37
+ warning("init not implemented")
38
+
39
+ def post_init(self):
40
+ warning("post_init not implemented")
41
+
42
+ def predict_one(self, x):
43
+ warning("predict_one not implemented")
44
+
45
+ def predict(self, X):
46
+ predictions = []
47
+ for x in X:
48
+ predictions.append(self.predict_one(x))
49
+ return predictions
50
+
51
+ def train(self, X, Y_true=None):
52
+ warning("train not implemented")
53
+
54
+ def describe(self):
55
+ return f"{type(self).__name__}: {self.model_name}"
@@ -0,0 +1,48 @@
1
+ #
2
+ # Pavlin Georgiev, Softel Labs
3
+ #
4
+ # This is a proprietary file and may not be copied,
5
+ # distributed, or modified without express permission
6
+ # from the owner. For licensing inquiries, please
7
+ # contact pavlin@softel.bg.
8
+ #
9
+ # 2023
10
+ #
11
+
12
+ import io
13
+ import base64
14
+ import numpy as np
15
+ from PIL import Image
16
+
17
+ from sciveo.tools.logger import *
18
+ from sciveo.tools.configuration import GlobalConfiguration
19
+ from sciveo.ml.base import BaseML
20
+
21
+
22
+ class BaseImageML(BaseML):
23
+ def __init__(self, model_name=1, cache_dir=None, device=None) -> None:
24
+ super().__init__(model_name, cache_dir, device)
25
+
26
+ def load_image(self, x):
27
+ if isinstance(x, Image.Image) or isinstance(x, np.ndarray):
28
+ image = x
29
+ elif isinstance(x, dict):
30
+ image = Image.open(x["local_path"])
31
+ elif isinstance(x, str):
32
+ if x.startswith("data:image"): # Base64 encoded?
33
+ image = Image.open(io.BytesIO(base64.b64decode(image)))
34
+ else:
35
+ image = Image.open(x)
36
+ else:
37
+ warning("unknown image format")
38
+ image = x
39
+ return image
40
+
41
+ def load(self, X):
42
+ images = []
43
+ for x in X:
44
+ images.append(self.load_image(x))
45
+ return images
46
+
47
+ def predict_one(self, x):
48
+ return self.predict([x])[0]
@@ -0,0 +1,64 @@
1
+ #
2
+ # Pavlin Georgiev, Softel Labs
3
+ #
4
+ # This is a proprietary file and may not be copied,
5
+ # distributed, or modified without express permission
6
+ # from the owner. For licensing inquiries, please
7
+ # contact pavlin@softel.bg.
8
+ #
9
+ # 2024
10
+ #
11
+
12
+ import os
13
+ import cv2
14
+ import gc
15
+ from PIL import Image
16
+
17
+ import torch
18
+ from diffusers import StableDiffusionPipeline, LMSDiscreteScheduler
19
+ from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
20
+ from transformers import AutoProcessor, AutoModelForCausalLM
21
+ from transformers import Blip2Processor, Blip2ForConditionalGeneration
22
+
23
+ from sciveo.tools.logger import *
24
+ from sciveo.tools.common import *
25
+ from sciveo.ml.images.base import BaseImageML
26
+
27
+
28
+ class ImageToText(BaseImageML):
29
+ def __init__(self, model_id, max_length=64, cache_dir=None, device=None) -> None:
30
+ super().__init__(model_id, cache_dir, device)
31
+ self.max_length = max_length
32
+
33
+ self.models = [
34
+ ["GIT", "softel/git-base-v1.0", "auto"],
35
+ ["GIT", "softel/git-large-v1.0", "auto"],
36
+ ["BLIP2", "softel/blip2-opt-2.7b-v1.0", torch.float16],
37
+ # ["BLIP2", "softel/blip2-opt-6.7b-v1.0", torch.float16],
38
+ ]
39
+
40
+ model_config = self.models[model_id]
41
+ self.dtype = model_config[2]
42
+
43
+ if model_config[0] == "GIT":
44
+ self.pipe = AutoProcessor.from_pretrained(model_config[1], cache_dir=self.cache_dir)
45
+ self.model = AutoModelForCausalLM.from_pretrained(model_config[1], torch_dtype=self.dtype, cache_dir=self.cache_dir).to(self.device)
46
+ elif model_config[0] == "BLIP2":
47
+ self.pipe = Blip2Processor.from_pretrained(model_config[1], cache_dir=self.cache_dir)
48
+ self.model = Blip2ForConditionalGeneration.from_pretrained(model_config[1], torch_dtype=self.dtype, device_map="auto", cache_dir=self.cache_dir)
49
+
50
+ debug("model name", model_config[1], "on device", self.device, "dtype", self.dtype, self.model.dtype)
51
+ self.dtype = self.model.dtype
52
+
53
+ def predict(self, images):
54
+ images = self.load(images)
55
+
56
+ pixel_values = self.pipe(images=images, return_tensors="pt").pixel_values.to(self.device, self.dtype)
57
+ ids = self.model.generate(pixel_values=pixel_values, max_length=self.max_length)
58
+ prediction = self.pipe.batch_decode(ids, skip_special_tokens=True)
59
+
60
+ del ids
61
+ del pixel_values
62
+
63
+ # debug("image description", prediction)
64
+ return prediction
@@ -0,0 +1,100 @@
1
+ #
2
+ # Pavlin Georgiev, Softel Labs
3
+ #
4
+ # This is a proprietary file and may not be copied,
5
+ # distributed, or modified without express permission
6
+ # from the owner. For licensing inquiries, please
7
+ # contact pavlin@softel.bg.
8
+ #
9
+ # 2024
10
+ #
11
+
12
+ import os
13
+ import boto3
14
+ import cv2
15
+ import io
16
+ import base64
17
+ from PIL import Image
18
+
19
+ import torch
20
+ from torchvision import models, transforms
21
+
22
+ import numpy as np
23
+ import pandas as pd
24
+
25
+ from transformers import AutoTokenizer, AutoModel
26
+
27
+ from sciveo.tools.logger import *
28
+ from sciveo.ml.images.base import BaseImageML
29
+
30
+
31
+ class ImageEmbedding(BaseImageML):
32
+ def __init__(self, model_id=1, cache_dir=None, device=None) -> None:
33
+ super().__init__(model_id, cache_dir=cache_dir, device=device)
34
+
35
+ def init_models(self):
36
+ self.model_name = [
37
+ "softel-resnet18-embedding.pth",
38
+ "softel-resnet34-embedding.pth",
39
+ "softel-resnet50-embedding.pth",
40
+ "softel-resnet101-embedding.pth",
41
+ "softel-resnet152-embedding.pth",
42
+ ][int(self.model_name)]
43
+ self.model_path = os.path.join(self.cache_dir, self.model_name)
44
+ if os.path.isfile(self.model_path):
45
+ debug(self.model_name, "available", self.model_path)
46
+ else:
47
+ debug("DWN", self.model_name)
48
+ s3 = boto3.client('s3')
49
+ s3.download_file("sciveo-model", self.model_name, self.model_path)
50
+
51
+ self.preprocessor = transforms.Compose([
52
+ transforms.Lambda(lambda img: img.convert("RGB") if img.mode != "RGB" else img),
53
+ transforms.Resize(256),
54
+ transforms.CenterCrop(224),
55
+ transforms.ToTensor(),
56
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
57
+ ])
58
+
59
+ self.model = None
60
+
61
+ def load_model(self):
62
+ debug("loading model", self.model_name, self.model_path)
63
+ self.model = torch.load(self.model_path).to(self.device)
64
+ self.model.eval()
65
+
66
+ def init(self):
67
+ if self.model is None:
68
+ self.load_model()
69
+ self.post_init()
70
+
71
+ def embed(self, image):
72
+ image_tensor = self.preprocessor(image).unsqueeze(0).to(self.device)
73
+ with torch.no_grad():
74
+ embedding = self.model(image_tensor).to("cpu")
75
+ return embedding.squeeze().numpy()
76
+
77
+ def read_image(self, x):
78
+ if isinstance(x, dict):
79
+ if "local_path" in x:
80
+ return Image.open(x["local_path"])
81
+ elif "guid" in x:
82
+ debug("not implemented guid", x["guid"])
83
+ else:
84
+ debug("not implemented", x)
85
+ else:
86
+ return Image.open(io.BytesIO(base64.b64decode(x)))
87
+
88
+ # TODO: should conform to the BaseImageML.predict_one() on image input
89
+ def predict_one(self, x):
90
+ self.init()
91
+ image = self.read_image(x)
92
+ return self.embed(image)
93
+
94
+ def predict(self, X):
95
+ self.init()
96
+ predictions = []
97
+ for current_x in X:
98
+ embedding = self.predict_one(current_x).tolist()
99
+ predictions.append(embedding)
100
+ return predictions
@@ -0,0 +1,128 @@
1
+ #
2
+ # Pavlin Georgiev, Softel Labs
3
+ #
4
+ # This is a proprietary file and may not be copied,
5
+ # distributed, or modified without express permission
6
+ # from the owner. For licensing inquiries, please
7
+ # contact pavlin@softel.bg.
8
+ #
9
+ # 2024
10
+ #
11
+
12
+ import os
13
+ import boto3
14
+ import cv2
15
+ import io
16
+ import base64
17
+ from PIL import Image
18
+
19
+ import torch
20
+ from torchvision import models, transforms
21
+
22
+ import numpy as np
23
+ import pandas as pd
24
+
25
+ from transformers import AutoTokenizer, AutoModel
26
+
27
+ from sciveo.tools.logger import *
28
+ from sciveo.ml.base import BaseML
29
+
30
+
31
+ class TextEmbedding(BaseML):
32
+ def __init__(self, model_name='softel/sentence-base-v0.3', cache_dir=None, device=None) -> None:
33
+ super().__init__(model_name=model_name, cache_dir=cache_dir, device=device)
34
+ self.pipe = None
35
+ self.tokenizer = None
36
+ self.max_tokens = 512
37
+ self.overlap = 128
38
+ self.hidden = False
39
+
40
+ def load_model(self):
41
+ debug("loading model", self.model_name)
42
+ self.pipe = AutoModel.from_pretrained(
43
+ self.model_name,
44
+ cache_dir=self.cache_dir
45
+ )#.to(self.device)
46
+ self.tokenizer = AutoTokenizer.from_pretrained(
47
+ self.model_name,
48
+ cache_dir=self.cache_dir
49
+ )
50
+
51
+ def init(self):
52
+ if self.pipe is None or self.tokenizer is None:
53
+ self.load_model()
54
+ self.post_init()
55
+
56
+ def get_tokens_count(self, text):
57
+ return len(self.tokenizer(text, padding=False, truncation=False, return_tensors=None)['input_ids'])
58
+
59
+ def chunk_text(self, text):
60
+ tokens = self.tokenizer(text, padding=False, truncation=False, return_tensors=None)['input_ids']
61
+ chunks = []
62
+ for i in range(0, len(tokens), self.max_tokens - self.overlap):
63
+ chunk = tokens[i:i + self.max_tokens]
64
+ chunks.append(self.tokenizer.decode(chunk, skip_special_tokens=True))
65
+ if len(chunk) < self.max_tokens:
66
+ break
67
+ return chunks
68
+
69
+ def get_chuncked_embeddings(self, text):
70
+ chunks = self.chunk_text(text)
71
+ embeddings = []
72
+ for chunk in chunks:
73
+ embedding = np.array(self.get_sentence_embedding(chunk))
74
+ embeddings.append(embedding)
75
+ return np.mean(np.array(embeddings), axis=0)
76
+
77
+ def get_sentence_embedding(self, text):
78
+ while(True):
79
+ try:
80
+ return self.embed(text)
81
+ except:
82
+ text = text[:len(text) - 8]
83
+
84
+ def get_embedding(self, text):
85
+ total_tokens = self.get_tokens_count(text)
86
+ if total_tokens<=self.max_tokens:
87
+ return self.get_sentence_embedding(text).tolist()
88
+ else:
89
+ return self.get_chuncked_embeddings(text).tolist()
90
+
91
+ def get_chunks_embedding(self, chunks):
92
+ embeddings = []
93
+ for text in chunks:
94
+ embeddings.append(self.get_embedding(text))
95
+ embeddings = np.array(embeddings)
96
+ return np.mean(embeddings, axis=0).tolist()
97
+
98
+ def embed(self, text):
99
+ inputs = self.tokenizer.encode_plus(text, return_tensors='pt')#.to(self.device)
100
+
101
+ if self.hidden:
102
+ with torch.no_grad():
103
+ last_hidden_state = self.pipe(**inputs, output_hidden_states=True).hidden_states[-1]
104
+ weights_for_non_padding = inputs['attention_mask'] * torch.arange(start=1, end=last_hidden_state.shape[1] + 1).unsqueeze(0)
105
+ sum_embeddings = torch.sum(last_hidden_state * weights_for_non_padding.unsqueeze(-1), dim=1)
106
+ num_of_none_padding_tokens = torch.sum(weights_for_non_padding, dim=-1).unsqueeze(-1)
107
+ embeddings = sum_embeddings / num_of_none_padding_tokens
108
+ else:
109
+ outputs = self.pipe(**inputs)
110
+ sequence_output = outputs[0]
111
+ input_mask_expanded = inputs['attention_mask'].unsqueeze(-1).expand(sequence_output.size()).float()
112
+ embeddings = torch.sum(sequence_output * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
113
+ return embeddings.detach().numpy()[0]
114
+
115
+ def predict_one(self, x):
116
+ self.init()
117
+ if isinstance(x, list):
118
+ return self.get_chunks_embedding(x)
119
+ else:
120
+ return self.get_embedding(x)
121
+
122
+ def predict(self, X):
123
+ self.init()
124
+ predictions = []
125
+ for current_x in X:
126
+ embedding = self.predict_one(current_x).tolist()
127
+ predictions.append(embedding)
128
+ return predictions
@@ -76,7 +76,7 @@ class NetworkTools:
76
76
  if self.arguments["localhost"]:
77
77
  self.scan_port_hosts(["127.0.0.1"], port)
78
78
  self.data["scan"][port].sort(key=lambda ip: int(ip.split('.')[-1]))
79
- debug(f"scan_port [{port}] elapsed time {t.stop():.1f}s", self.data["scan"][port])
79
+ info(f"scan_port [{port}] elapsed time {t.stop():.1f}s", self.data["scan"][port])
80
80
  return self.data["scan"][port]
81
81
 
82
82
  def scan_port_hosts(self, list_ip, port=22):
@@ -29,7 +29,9 @@ def _sciveo_get_logger(name):
29
29
  with _sciveo_log_lock:
30
30
  if not logger.hasHandlers():
31
31
  log_min_level = logging.getLevelName(_sciveo_log_min_level)
32
- if (isinstance(log_min_level, str) and log_min_level.startswith("Level")) or isinstance(log_min_level, int):
32
+ if (isinstance(log_min_level, str) and log_min_level.startswith("Level")):
33
+ log_min_level = "DEBUG"
34
+ if isinstance(log_min_level, int) and log_min_level < 10:
33
35
  log_min_level = "DEBUG"
34
36
  logger.setLevel(log_min_level)
35
37
 
@@ -0,0 +1,2 @@
1
+
2
+ __version__ = '0.1.35'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sciveo
3
- Version: 0.1.34
3
+ Version: 0.1.35
4
4
  Description-Content-Type: text/markdown
5
5
  Provides-Extra: mon
6
6
  Provides-Extra: net
@@ -88,13 +88,18 @@ sciveo/media/pipelines/processors/video/video_resample.py
88
88
  sciveo/media/pipelines/web/__init__.py
89
89
  sciveo/media/pipelines/web/server.py
90
90
  sciveo/ml/__init__.py
91
+ sciveo/ml/base.py
91
92
  sciveo/ml/evaluation/__init__.py
92
93
  sciveo/ml/evaluation/object_detection.py
93
94
  sciveo/ml/images/__init__.py
95
+ sciveo/ml/images/base.py
96
+ sciveo/ml/images/description.py
97
+ sciveo/ml/images/embeddings.py
94
98
  sciveo/ml/images/object_detection.py
95
99
  sciveo/ml/images/tools.py
96
100
  sciveo/ml/images/transforms.py
97
101
  sciveo/ml/nlp/__init__.py
102
+ sciveo/ml/nlp/embeddings.py
98
103
  sciveo/monitoring/__init__.py
99
104
  sciveo/monitoring/monitor.py
100
105
  sciveo/monitoring/start.py