sciveo 0.1.38__tar.gz → 0.1.40__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sciveo-0.1.38 → sciveo-0.1.40}/PKG-INFO +2 -1
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/pipeline.py +1 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/image/generators.py +51 -1
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/ml/images/description.py +47 -5
- sciveo-0.1.40/sciveo/ml/nlp/tokenizers/bpe.py +76 -0
- sciveo-0.1.40/sciveo/tools/aws/__init__.py +0 -0
- sciveo-0.1.40/sciveo/tools/complexity.py +75 -0
- sciveo-0.1.40/sciveo/version.py +2 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo.egg-info/PKG-INFO +2 -1
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo.egg-info/SOURCES.txt +6 -1
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo.egg-info/requires.txt +4 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/setup.py +5 -1
- sciveo-0.1.40/test/test_complexity.py +36 -0
- sciveo-0.1.40/test/test_tokenizers.py +34 -0
- sciveo-0.1.38/sciveo/version.py +0 -2
- {sciveo-0.1.38 → sciveo-0.1.40}/README.md +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/api/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/api/base.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/api/upload.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/cli.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/common/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/common/configuration.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/common/model.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/common/optimizers.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/common/sampling.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/content/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/content/dataset.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/content/experiment.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/content/project.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/content/runner.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/ml/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/ml/base.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/ml/encoders/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/ml/encoders/base.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/ml/encoders/normalizer.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/ml/nlp/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/ml/nlp/search.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/ml/time_series/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/ml/time_series/dataset.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/ml/time_series/predictor.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/ml/time_series/trainer.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/ml/time_series/window_generator.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/base.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/job_daemon.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/layouts/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/layouts/base.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/postprocessors/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/postprocessors/base.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/postprocessors/default.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/audio/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/audio/audio.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/audio/audio_extractor_process.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/aws.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/base.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/file/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/file/archive.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/image/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/image/album.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/image/album_in_image.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/image/depth_esimation.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/image/embeddings.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/image/filters.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/image/histogram.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/image/mask.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/image/object_detection.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/image/resize.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/image/segmentation.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/image/watermark.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/media_info.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/nlp/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/nlp/address.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/qr.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/sci/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/sci/base.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/sci/dataset.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/sci/time_series/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/sci/time_series/predictor.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/sci/time_series/trainer.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/tpu_base.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/video/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/video/generators.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/video/motion_detection.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/video/resize.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/video/video_album.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/video/video_frames.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/video/video_resample.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/queues.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/server.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/web/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/web/server.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/ml/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/ml/base.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/ml/evaluation/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/ml/evaluation/object_detection.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/ml/images/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/ml/images/base.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/ml/images/embeddings.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/ml/images/object_detection.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/ml/images/tools.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/ml/images/transformers.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/ml/nlp/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/ml/nlp/embeddings.py +0 -0
- {sciveo-0.1.38/sciveo/ml/video → sciveo-0.1.40/sciveo/ml/nlp/tokenizers}/__init__.py +0 -0
- {sciveo-0.1.38/sciveo/monitoring → sciveo-0.1.40/sciveo/ml/video}/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/ml/video/description.py +0 -0
- {sciveo-0.1.38/sciveo/network → sciveo-0.1.40/sciveo/monitoring}/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/monitoring/monitor.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/monitoring/start.py +0 -0
- {sciveo-0.1.38/sciveo/tools → sciveo-0.1.40/sciveo/network}/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/network/camera.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/network/sniffer.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/network/tools.py +0 -0
- {sciveo-0.1.38/sciveo/tools/aws → sciveo-0.1.40/sciveo/tools}/__init__.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/tools/array.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/tools/aws/priority_queue.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/tools/aws/s3.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/tools/common.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/tools/compress.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/tools/configuration.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/tools/crypto.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/tools/daemon.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/tools/formating.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/tools/hardware.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/tools/http.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/tools/logger.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/tools/os.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/tools/random.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/tools/remote.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/tools/simple_counter.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/tools/synchronized.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo/tools/timers.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo.egg-info/dependency_links.txt +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo.egg-info/entry_points.txt +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/sciveo.egg-info/top_level.txt +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/setup.cfg +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/test/test_compress.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/test/test_configuration.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/test/test_crypto.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/test/test_monitoring.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/test/test_runner.py +0 -0
- {sciveo-0.1.38 → sciveo-0.1.40}/test/test_sampling.py +0 -0
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sciveo
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.40
|
|
4
4
|
Description-Content-Type: text/markdown
|
|
5
5
|
Provides-Extra: mon
|
|
6
6
|
Provides-Extra: net
|
|
7
7
|
Provides-Extra: media
|
|
8
8
|
Provides-Extra: media-server
|
|
9
9
|
Provides-Extra: media-ml
|
|
10
|
+
Provides-Extra: media-ml-all
|
|
10
11
|
Provides-Extra: all
|
|
11
12
|
Provides-Extra: media-all
|
|
12
13
|
|
|
@@ -81,6 +81,7 @@ class MediaPipeline:
|
|
|
81
81
|
"image-diffusion": ImageDiffusionText,
|
|
82
82
|
"image-diffusion-image-text": ImageDiffusionImageText,
|
|
83
83
|
"image-to-text": ImageToTextProcessor,
|
|
84
|
+
"image-query": ImageQueryProcessor,
|
|
84
85
|
"image-fgbg-filter": ImageFGBGFilter,
|
|
85
86
|
"image-segmentation": ImageSegmentation,
|
|
86
87
|
"image-depth-estimation": ImageDepthEstimation,
|
|
@@ -24,7 +24,7 @@ from sciveo.tools.logger import *
|
|
|
24
24
|
from sciveo.tools.common import *
|
|
25
25
|
from sciveo.media.pipelines.processors.tpu_base import *
|
|
26
26
|
from sciveo.media.pipelines.base import ApiContent
|
|
27
|
-
from sciveo.ml.images.description import ImageToText
|
|
27
|
+
from sciveo.ml.images.description import ImageToText, ImageQuery
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
class ImageDiffusionText(TPUBaseProcessor):
|
|
@@ -202,6 +202,56 @@ class ImageToTextProcessor(TPUBaseProcessor):
|
|
|
202
202
|
return True
|
|
203
203
|
|
|
204
204
|
|
|
205
|
+
class ImageQueryProcessor(TPUBaseProcessor):
|
|
206
|
+
def __init__(self, processor_config, max_progress) -> None:
|
|
207
|
+
super().__init__(processor_config, max_progress)
|
|
208
|
+
|
|
209
|
+
self.api = ApiContent()
|
|
210
|
+
|
|
211
|
+
self.cache_dir = os.path.join(os.environ['MEDIA_MODELS_BASE_PATH'], "models/")
|
|
212
|
+
self.device = os.environ.get("MEDIA_PROCESSING_BACKEND", "cpu")
|
|
213
|
+
|
|
214
|
+
self.default.update({
|
|
215
|
+
"max_length": 64,
|
|
216
|
+
"model_id": 0,
|
|
217
|
+
"query": "describe the image",
|
|
218
|
+
"output": False
|
|
219
|
+
})
|
|
220
|
+
|
|
221
|
+
self.predictor = None
|
|
222
|
+
|
|
223
|
+
def process(self, media):
|
|
224
|
+
debug("process", media['guid'])
|
|
225
|
+
if self.predictor is None:
|
|
226
|
+
self.predictor = ImageQuery(self['query'], self['model_id'], self['max_length'], self.cache_dir, self.device)
|
|
227
|
+
local_path = media["local_path"]
|
|
228
|
+
predict = self.predictor.predict_one(local_path)
|
|
229
|
+
return self.set_media(media, predict)
|
|
230
|
+
|
|
231
|
+
def set_media(self, media, predict):
|
|
232
|
+
media.setdefault("next", [])
|
|
233
|
+
media["next"].append({
|
|
234
|
+
"guid": f"TXT-{media['guid']}",
|
|
235
|
+
"parent": media['guid'],
|
|
236
|
+
"content_type": "comment",
|
|
237
|
+
"content_text": predict,
|
|
238
|
+
"owner": media["owner"],
|
|
239
|
+
"name": f"{predict} [{self['max_length']}]",
|
|
240
|
+
"processor": self.name()
|
|
241
|
+
})
|
|
242
|
+
|
|
243
|
+
return media
|
|
244
|
+
|
|
245
|
+
def content_type(self):
|
|
246
|
+
return "image"
|
|
247
|
+
|
|
248
|
+
def name(self):
|
|
249
|
+
return "image-query"
|
|
250
|
+
|
|
251
|
+
def is_append_processor(self):
|
|
252
|
+
return True
|
|
253
|
+
|
|
254
|
+
|
|
205
255
|
class ImageDiffusionImageText(TPUBaseProcessor):
|
|
206
256
|
def __init__(self, processor_config, max_progress) -> None:
|
|
207
257
|
super().__init__(processor_config, max_progress)
|
|
@@ -33,12 +33,14 @@ class ImageToText(BaseImageML):
|
|
|
33
33
|
self.models = [
|
|
34
34
|
["GIT", "softel/git-base-v1.0", "auto"],
|
|
35
35
|
["GIT", "softel/git-large-v1.0", "auto"],
|
|
36
|
+
["CAPTION", "softel/image-instruct-v2.0", "auto"],
|
|
36
37
|
["BLIP2", "softel/blip2-opt-2.7b-v1.0", torch.float16],
|
|
37
38
|
# ["BLIP2", "softel/blip2-opt-6.7b-v1.0", torch.float16],
|
|
38
39
|
]
|
|
39
40
|
|
|
40
41
|
model_config = self.models[model_id]
|
|
41
42
|
self.dtype = model_config[2]
|
|
43
|
+
self.model_type = model_config[0]
|
|
42
44
|
|
|
43
45
|
if model_config[0] == "GIT":
|
|
44
46
|
self.pipe = AutoProcessor.from_pretrained(model_config[1], cache_dir=self.cache_dir)
|
|
@@ -46,6 +48,9 @@ class ImageToText(BaseImageML):
|
|
|
46
48
|
elif model_config[0] == "BLIP2":
|
|
47
49
|
self.pipe = Blip2Processor.from_pretrained(model_config[1], cache_dir=self.cache_dir)
|
|
48
50
|
self.model = Blip2ForConditionalGeneration.from_pretrained(model_config[1], torch_dtype=self.dtype, device_map="auto", cache_dir=self.cache_dir)
|
|
51
|
+
elif model_config[0] == "CAPTION":
|
|
52
|
+
self.model = AutoModelForCausalLM.from_pretrained(model_config[1], torch_dtype=self.dtype, device_map="auto", cache_dir=self.cache_dir, trust_remote_code=True)
|
|
53
|
+
self.pipe = self.model
|
|
49
54
|
|
|
50
55
|
debug("model name", model_config[1], "on device", self.device, "dtype", self.dtype, self.model.dtype)
|
|
51
56
|
self.dtype = self.model.dtype
|
|
@@ -53,12 +58,49 @@ class ImageToText(BaseImageML):
|
|
|
53
58
|
def predict(self, images):
|
|
54
59
|
images = self.load(images)
|
|
55
60
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
61
|
+
if self.model_type == "CAPTION":
|
|
62
|
+
prediction = []
|
|
63
|
+
if self.max_length < 64:
|
|
64
|
+
predict_length = "short"
|
|
65
|
+
else:
|
|
66
|
+
predict_length = "normal"
|
|
67
|
+
for image in images:
|
|
68
|
+
prediction.append(self.model.caption(image, length=predict_length))
|
|
69
|
+
else:
|
|
70
|
+
pixel_values = self.pipe(images=images, return_tensors="pt").pixel_values.to(self.device, self.dtype)
|
|
71
|
+
ids = self.model.generate(pixel_values=pixel_values, max_length=self.max_length)
|
|
72
|
+
prediction = self.pipe.batch_decode(ids, skip_special_tokens=True)
|
|
59
73
|
|
|
60
|
-
|
|
61
|
-
|
|
74
|
+
del ids
|
|
75
|
+
del pixel_values
|
|
62
76
|
|
|
63
77
|
# debug("image description", prediction)
|
|
64
78
|
return prediction
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class ImageQuery(BaseImageML):
|
|
82
|
+
def __init__(self, query, model_id, max_length=64, cache_dir=None, device=None) -> None:
|
|
83
|
+
super().__init__(model_id, cache_dir, device)
|
|
84
|
+
self.query = query
|
|
85
|
+
self.max_length = max_length
|
|
86
|
+
|
|
87
|
+
self.models = [
|
|
88
|
+
["CAPTION", "softel/image-instruct-v2.0", "auto"],
|
|
89
|
+
# ["BLIP2", "softel/blip2-opt-2.7b-v1.0", torch.float16],
|
|
90
|
+
# ["BLIP2", "softel/blip2-opt-6.7b-v1.0", torch.float16],
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
model_config = self.models[model_id]
|
|
94
|
+
self.dtype = model_config[2]
|
|
95
|
+
self.model_type = model_config[0]
|
|
96
|
+
|
|
97
|
+
if model_config[0] == "CAPTION":
|
|
98
|
+
self.model = AutoModelForCausalLM.from_pretrained(model_config[1], torch_dtype=self.dtype, device_map="auto", cache_dir=self.cache_dir, trust_remote_code=True)
|
|
99
|
+
self.pipe = self.model
|
|
100
|
+
|
|
101
|
+
debug("model name", model_config[1], "on device", self.device, "dtype", self.dtype, self.model.dtype)
|
|
102
|
+
self.dtype = self.model.dtype
|
|
103
|
+
|
|
104
|
+
def predict_one(self, x):
|
|
105
|
+
image = self.load_image(x)
|
|
106
|
+
return self.model.query(image, self.query)["answer"]
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Pavlin Georgiev, Softel Labs
|
|
3
|
+
#
|
|
4
|
+
# This is a proprietary file and may not be copied,
|
|
5
|
+
# distributed, or modified without express permission
|
|
6
|
+
# from the owner. For licensing inquiries, please
|
|
7
|
+
# contact pavlin@softel.bg.
|
|
8
|
+
#
|
|
9
|
+
# 2024
|
|
10
|
+
#
|
|
11
|
+
|
|
12
|
+
import re
|
|
13
|
+
|
|
14
|
+
from sciveo.tools.logger import *
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class BPETokenizer:
|
|
18
|
+
def __init__(self, max_size):
|
|
19
|
+
self.initial_tokens = 256
|
|
20
|
+
self.max_size = max_size
|
|
21
|
+
self.max_merges = max_size - self.initial_tokens
|
|
22
|
+
self.vocab = {}
|
|
23
|
+
self.merges = {}
|
|
24
|
+
|
|
25
|
+
def encode(self, text):
|
|
26
|
+
tokens = list(map(int, text.encode("utf-8")))
|
|
27
|
+
l1 = len(tokens)
|
|
28
|
+
for k, v in self.merges.items():
|
|
29
|
+
self.merge(tokens, k, v)
|
|
30
|
+
debug(f"encoded ratio {len(tokens) / l1:.2}x")
|
|
31
|
+
return tokens
|
|
32
|
+
|
|
33
|
+
def decode_token(self, token):
|
|
34
|
+
if token not in self.vocab:
|
|
35
|
+
return [token]
|
|
36
|
+
|
|
37
|
+
bigram = self.vocab[token]
|
|
38
|
+
return self.decode_token(bigram[0]) + self.decode_token(bigram[1])
|
|
39
|
+
|
|
40
|
+
def decode(self, tokens):
|
|
41
|
+
decoded = []
|
|
42
|
+
for token in tokens:
|
|
43
|
+
decoded += self.decode_token(token)
|
|
44
|
+
return bytes(decoded).decode("utf-8", errors="replace")
|
|
45
|
+
|
|
46
|
+
def train(self, text, debug_step=100):
|
|
47
|
+
tokens = list(map(int, text.encode("utf-8")))
|
|
48
|
+
token_id = self.initial_tokens
|
|
49
|
+
debug("max_merges", self.max_merges)
|
|
50
|
+
while(len(self.merges) < self.max_merges):
|
|
51
|
+
current_counts = self.counts(tokens)
|
|
52
|
+
bigram = max(current_counts, key=current_counts.get)
|
|
53
|
+
self.merge(tokens, bigram, token_id)
|
|
54
|
+
self.merges[bigram] = token_id
|
|
55
|
+
self.vocab[token_id] = bigram
|
|
56
|
+
token_id += 1
|
|
57
|
+
if len(self.merges) % debug_step == 0:
|
|
58
|
+
debug("train", f"{len(self.merges)}/{self.max_merges}")
|
|
59
|
+
|
|
60
|
+
def fit(self, x):
|
|
61
|
+
return self.train(x)
|
|
62
|
+
|
|
63
|
+
def counts(self, tokens):
|
|
64
|
+
result = {}
|
|
65
|
+
for bigram in zip(tokens, tokens[1:]):
|
|
66
|
+
result.setdefault(bigram, 0)
|
|
67
|
+
result[bigram] += 1
|
|
68
|
+
return result
|
|
69
|
+
|
|
70
|
+
def merge(self, tokens, bigram, token_id):
|
|
71
|
+
i = 0
|
|
72
|
+
while i < len(tokens) - 1:
|
|
73
|
+
if tokens[i] == bigram[0] and tokens[i + 1] == bigram[1]:
|
|
74
|
+
tokens[i] = token_id
|
|
75
|
+
del tokens[i + 1]
|
|
76
|
+
i += 1
|
|
File without changes
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Pavlin Georgiev, Softel Labs
|
|
3
|
+
#
|
|
4
|
+
# This is a proprietary file and may not be copied,
|
|
5
|
+
# distributed, or modified without express permission
|
|
6
|
+
# from the owner. For licensing inquiries, please
|
|
7
|
+
# contact pavlin@softel.bg.
|
|
8
|
+
#
|
|
9
|
+
# 2024
|
|
10
|
+
#
|
|
11
|
+
|
|
12
|
+
import math
|
|
13
|
+
|
|
14
|
+
from sciveo.tools.logger import *
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
"""
|
|
18
|
+
Simple complexity evaluation
|
|
19
|
+
Evaluate against few well known complexities, could add more in self.list_complexity
|
|
20
|
+
"""
|
|
21
|
+
class ComplexityEval:
|
|
22
|
+
def __init__(self, samples, printer=print):
|
|
23
|
+
if isinstance(samples, int):
|
|
24
|
+
self.size = samples
|
|
25
|
+
elif isinstance(samples, list) or isinstance(samples, dict):
|
|
26
|
+
self.size = len(samples)
|
|
27
|
+
else:
|
|
28
|
+
self.size = int(samples)
|
|
29
|
+
|
|
30
|
+
self.printer = printer
|
|
31
|
+
|
|
32
|
+
self.list_complexity = [
|
|
33
|
+
("logN", round(math.log2(self.size), 2)),
|
|
34
|
+
("N", self.size),
|
|
35
|
+
("NlogN", round(self.size * math.log2(self.size))),
|
|
36
|
+
("N^2", self.size ** 2),
|
|
37
|
+
("N^3", self.size ** 3),
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
self.complexity_str = ""
|
|
41
|
+
for c in self.list_complexity:
|
|
42
|
+
self.complexity_str += f"{c[0]}={c[1]} "
|
|
43
|
+
self.complexity_str = self.complexity_str.strip()
|
|
44
|
+
|
|
45
|
+
def calc_power(self, iterations):
|
|
46
|
+
return math.log(iterations) / math.log(self.size)
|
|
47
|
+
|
|
48
|
+
def evaluate(self, iterations):
|
|
49
|
+
this_power = self.calc_power(iterations)
|
|
50
|
+
this_complexity = None
|
|
51
|
+
|
|
52
|
+
for i in range(0, len(self.list_complexity)):
|
|
53
|
+
if iterations <= self.list_complexity[i][1]:
|
|
54
|
+
if i == 0:
|
|
55
|
+
this_complexity = self.list_complexity[i]
|
|
56
|
+
break
|
|
57
|
+
else:
|
|
58
|
+
if this_power <= (self.calc_power(self.list_complexity[i][1]) + self.calc_power(self.list_complexity[i - 1][1])) / 2:
|
|
59
|
+
this_complexity = self.list_complexity[i - 1]
|
|
60
|
+
else:
|
|
61
|
+
this_complexity = self.list_complexity[i]
|
|
62
|
+
break
|
|
63
|
+
if this_complexity is None:
|
|
64
|
+
this_complexity = self.list_complexity[-1]
|
|
65
|
+
|
|
66
|
+
return this_complexity
|
|
67
|
+
|
|
68
|
+
def __call__(self, iterations):
|
|
69
|
+
return self.evaluate(iterations)
|
|
70
|
+
|
|
71
|
+
def print(self, iterations):
|
|
72
|
+
this_complexity = self.evaluate(iterations)
|
|
73
|
+
this_power = self.calc_power(iterations)
|
|
74
|
+
self.printer("size", self.size, "iterations", iterations, f"(N^{this_power:.2f})({this_complexity[1]})", f"[{self.complexity_str}]")
|
|
75
|
+
self.printer(f"O(N) = {this_complexity[0]}")
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sciveo
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.40
|
|
4
4
|
Description-Content-Type: text/markdown
|
|
5
5
|
Provides-Extra: mon
|
|
6
6
|
Provides-Extra: net
|
|
7
7
|
Provides-Extra: media
|
|
8
8
|
Provides-Extra: media-server
|
|
9
9
|
Provides-Extra: media-ml
|
|
10
|
+
Provides-Extra: media-ml-all
|
|
10
11
|
Provides-Extra: all
|
|
11
12
|
Provides-Extra: media-all
|
|
12
13
|
|
|
@@ -100,6 +100,8 @@ sciveo/ml/images/tools.py
|
|
|
100
100
|
sciveo/ml/images/transformers.py
|
|
101
101
|
sciveo/ml/nlp/__init__.py
|
|
102
102
|
sciveo/ml/nlp/embeddings.py
|
|
103
|
+
sciveo/ml/nlp/tokenizers/__init__.py
|
|
104
|
+
sciveo/ml/nlp/tokenizers/bpe.py
|
|
103
105
|
sciveo/ml/video/__init__.py
|
|
104
106
|
sciveo/ml/video/description.py
|
|
105
107
|
sciveo/monitoring/__init__.py
|
|
@@ -112,6 +114,7 @@ sciveo/network/tools.py
|
|
|
112
114
|
sciveo/tools/__init__.py
|
|
113
115
|
sciveo/tools/array.py
|
|
114
116
|
sciveo/tools/common.py
|
|
117
|
+
sciveo/tools/complexity.py
|
|
115
118
|
sciveo/tools/compress.py
|
|
116
119
|
sciveo/tools/configuration.py
|
|
117
120
|
sciveo/tools/crypto.py
|
|
@@ -129,9 +132,11 @@ sciveo/tools/timers.py
|
|
|
129
132
|
sciveo/tools/aws/__init__.py
|
|
130
133
|
sciveo/tools/aws/priority_queue.py
|
|
131
134
|
sciveo/tools/aws/s3.py
|
|
135
|
+
test/test_complexity.py
|
|
132
136
|
test/test_compress.py
|
|
133
137
|
test/test_configuration.py
|
|
134
138
|
test/test_crypto.py
|
|
135
139
|
test/test_monitoring.py
|
|
136
140
|
test/test_runner.py
|
|
137
|
-
test/test_sampling.py
|
|
141
|
+
test/test_sampling.py
|
|
142
|
+
test/test_tokenizers.py
|
|
@@ -30,7 +30,11 @@ extras_require = {
|
|
|
30
30
|
'tensorflow>=0.0.0', 'keras>=0.0.0',
|
|
31
31
|
'torch>=0.0.0', 'torchvision>=0.0.0',
|
|
32
32
|
'diffusers>=0.0.0', 'transformers>=0.0.0', 'accelerate>=0.0.0', 'annoy>=0.0.0',
|
|
33
|
-
'ultralytics>=0.0.0'
|
|
33
|
+
'ultralytics>=0.0.0',
|
|
34
|
+
],
|
|
35
|
+
'media-ml-all': [
|
|
36
|
+
'pyvips>=0.0.0',
|
|
37
|
+
'einops>=0.0.0'
|
|
34
38
|
]
|
|
35
39
|
}
|
|
36
40
|
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Pavlin Georgiev, Softel Labs
|
|
3
|
+
#
|
|
4
|
+
# This is a proprietary file and may not be copied,
|
|
5
|
+
# distributed, or modified without express permission
|
|
6
|
+
# from the owner. For licensing inquiries, please
|
|
7
|
+
# contact pavlin@softel.bg.
|
|
8
|
+
#
|
|
9
|
+
# 2024
|
|
10
|
+
#
|
|
11
|
+
|
|
12
|
+
import math
|
|
13
|
+
import unittest
|
|
14
|
+
|
|
15
|
+
from sciveo.tools.complexity import *
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TestComplexity(unittest.TestCase):
|
|
19
|
+
def _do_test(self, ce):
|
|
20
|
+
self.assertTrue(ce(2e3)[0] == "N")
|
|
21
|
+
self.assertTrue(ce(1e7)[0] == "N^2")
|
|
22
|
+
self.assertTrue(ce(2e4)[0] == "NlogN")
|
|
23
|
+
|
|
24
|
+
def test_1(self):
|
|
25
|
+
ce = ComplexityEval(1024)
|
|
26
|
+
self._do_test(ce)
|
|
27
|
+
|
|
28
|
+
def test_input(self):
|
|
29
|
+
ce = ComplexityEval(1024)
|
|
30
|
+
self._do_test(ce)
|
|
31
|
+
ce = ComplexityEval([1]*1024)
|
|
32
|
+
self._do_test(ce)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
if __name__ == '__main__':
|
|
36
|
+
unittest.main()
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Pavlin Georgiev, Softel Labs
|
|
3
|
+
#
|
|
4
|
+
# This is a proprietary file and may not be copied,
|
|
5
|
+
# distributed, or modified without express permission
|
|
6
|
+
# from the owner. For licensing inquiries, please
|
|
7
|
+
# contact pavlin@softel.bg.
|
|
8
|
+
#
|
|
9
|
+
# 2024
|
|
10
|
+
#
|
|
11
|
+
|
|
12
|
+
import math
|
|
13
|
+
import unittest
|
|
14
|
+
|
|
15
|
+
from sciveo.ml.nlp.tokenizers.bpe import *
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TestTokenizers(unittest.TestCase):
|
|
19
|
+
def test_BPE(self):
|
|
20
|
+
text = "節樂,《漢語大詞典》一則:「《史記.樂書》:凡作樂者,所以節槳。張守義正義:音洛,言不樂至荒淫也, 網站有中、英文版本,也有繁、簡體版,可通過每頁左上角的連結隨時調整 Unicode! 🅤🅝🅘🅒🅞🅓🅔‽ 🇺🇳🇮🇨🇴🇩🇪! 😄 The very name strikes fear and awe into the hearts of programmers worldwide. We all know we ought to “support Unicode” in our software (whatever that means—like using wchar_t for all the strings, right?)"
|
|
21
|
+
text += "Using a row in the above table to encode a code point less than 'First code point' (thus using more bytes than necessary) is termed an overlong encoding. These are a security problem because they allow the same code point to be encoded in multiple ways. Overlong encodings (of ../ for example) have been used to bypass security validations in high-profile products including Microsoft's IIS web server[14] and Apache's Tomcat servlet container.[15] Overlong encodings should therefore be considered an error and never decoded. Modified UTF-8 allows an overlong encoding of U+0000."
|
|
22
|
+
|
|
23
|
+
T = BPETokenizer(max_size=512)
|
|
24
|
+
T.train(text)
|
|
25
|
+
|
|
26
|
+
t = "你好世界,美好的一天"
|
|
27
|
+
self.assertTrue(T.decode(T.encode(t)) == t)
|
|
28
|
+
|
|
29
|
+
t = "hello world and testing"
|
|
30
|
+
self.assertTrue(T.decode(T.encode(t)) == t)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
if __name__ == '__main__':
|
|
34
|
+
unittest.main()
|
sciveo-0.1.38/sciveo/version.py
DELETED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/audio/audio_extractor_process.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/sci/time_series/__init__.py
RENAMED
|
File without changes
|
{sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/sci/time_series/predictor.py
RENAMED
|
File without changes
|
{sciveo-0.1.38 → sciveo-0.1.40}/sciveo/media/pipelines/processors/sci/time_series/trainer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|