sciveo 0.1.54__tar.gz → 0.1.56__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. {sciveo-0.1.54 → sciveo-0.1.56}/PKG-INFO +1 -1
  2. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/tools/nvr.py +41 -8
  3. sciveo-0.1.56/sciveo/ml/evaluation/markdown.py +227 -0
  4. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/monitoring/monitor.py +1 -1
  5. sciveo-0.1.56/sciveo/version.py +2 -0
  6. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo.egg-info/PKG-INFO +1 -1
  7. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo.egg-info/SOURCES.txt +2 -1
  8. sciveo-0.1.56/test/test_eval_markdown.py +41 -0
  9. sciveo-0.1.54/sciveo/ml/images/segmentation.py +0 -304
  10. sciveo-0.1.54/sciveo/version.py +0 -2
  11. {sciveo-0.1.54 → sciveo-0.1.56}/README.md +0 -0
  12. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/__init__.py +0 -0
  13. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/api/__init__.py +0 -0
  14. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/api/base.py +0 -0
  15. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/api/predictors.py +0 -0
  16. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/api/server.py +0 -0
  17. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/api/upload.py +0 -0
  18. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/cli.py +0 -0
  19. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/common/__init__.py +0 -0
  20. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/common/configuration.py +0 -0
  21. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/common/model.py +0 -0
  22. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/common/optimizers.py +0 -0
  23. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/common/sampling.py +0 -0
  24. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/content/__init__.py +0 -0
  25. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/content/dataset.py +0 -0
  26. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/content/experiment.py +0 -0
  27. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/content/project.py +0 -0
  28. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/content/runner.py +0 -0
  29. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/__init__.py +0 -0
  30. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/ml/__init__.py +0 -0
  31. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/ml/base.py +0 -0
  32. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/ml/encoders/__init__.py +0 -0
  33. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/ml/encoders/base.py +0 -0
  34. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/ml/encoders/normalizer.py +0 -0
  35. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/ml/nlp/__init__.py +0 -0
  36. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/ml/nlp/search.py +0 -0
  37. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/ml/time_series/__init__.py +0 -0
  38. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/ml/time_series/dataset.py +0 -0
  39. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/ml/time_series/predictor.py +0 -0
  40. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/ml/time_series/trainer.py +0 -0
  41. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/ml/time_series/window_generator.py +0 -0
  42. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/__init__.py +0 -0
  43. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/base.py +0 -0
  44. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/job_daemon.py +0 -0
  45. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/layouts/__init__.py +0 -0
  46. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/layouts/base.py +0 -0
  47. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/pipeline.py +0 -0
  48. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/postprocessors/__init__.py +0 -0
  49. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/postprocessors/base.py +0 -0
  50. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/postprocessors/default.py +0 -0
  51. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/__init__.py +0 -0
  52. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/audio/__init__.py +0 -0
  53. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/audio/audio.py +0 -0
  54. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/audio/audio_extractor_process.py +0 -0
  55. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/aws.py +0 -0
  56. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/base.py +0 -0
  57. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/file/__init__.py +0 -0
  58. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/file/archive.py +0 -0
  59. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/image/__init__.py +0 -0
  60. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/image/album.py +0 -0
  61. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/image/album_in_image.py +0 -0
  62. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/image/depth_esimation.py +0 -0
  63. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/image/embeddings.py +0 -0
  64. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/image/filters.py +0 -0
  65. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/image/generators.py +0 -0
  66. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/image/histogram.py +0 -0
  67. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/image/mask.py +0 -0
  68. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/image/object_detection.py +0 -0
  69. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/image/resize.py +0 -0
  70. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/image/segmentation.py +0 -0
  71. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/image/watermark.py +0 -0
  72. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/media_info.py +0 -0
  73. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/nlp/__init__.py +0 -0
  74. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/nlp/address.py +0 -0
  75. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/qr.py +0 -0
  76. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/sci/__init__.py +0 -0
  77. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/sci/base.py +0 -0
  78. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/sci/dataset.py +0 -0
  79. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/sci/time_series/__init__.py +0 -0
  80. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/sci/time_series/predictor.py +0 -0
  81. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/sci/time_series/trainer.py +0 -0
  82. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/tpu_base.py +0 -0
  83. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/video/__init__.py +0 -0
  84. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/video/generators.py +0 -0
  85. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/video/motion_detection.py +0 -0
  86. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/video/resize.py +0 -0
  87. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/video/video_album.py +0 -0
  88. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/video/video_frames.py +0 -0
  89. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/processors/video/video_resample.py +0 -0
  90. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/queues.py +0 -0
  91. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/server.py +0 -0
  92. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/web/__init__.py +0 -0
  93. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/pipelines/web/server.py +0 -0
  94. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/tools/__init__.py +0 -0
  95. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/media/tools/video_interactive.py +0 -0
  96. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/ml/__init__.py +0 -0
  97. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/ml/base.py +0 -0
  98. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/ml/dataset/__init__.py +0 -0
  99. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/ml/dataset/object_detection.py +0 -0
  100. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/ml/evaluation/__init__.py +0 -0
  101. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/ml/evaluation/object_detection.py +0 -0
  102. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/ml/images/__init__.py +0 -0
  103. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/ml/images/base.py +0 -0
  104. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/ml/images/description.py +0 -0
  105. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/ml/images/embeddings.py +0 -0
  106. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/ml/images/object_detection.py +0 -0
  107. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/ml/images/tools.py +0 -0
  108. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/ml/images/transformers.py +0 -0
  109. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/ml/nlp/__init__.py +0 -0
  110. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/ml/nlp/embeddings.py +0 -0
  111. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/ml/nlp/tokenizers/__init__.py +0 -0
  112. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/ml/nlp/tokenizers/bpe.py +0 -0
  113. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/ml/video/__init__.py +0 -0
  114. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/ml/video/description.py +0 -0
  115. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/monitoring/__init__.py +0 -0
  116. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/monitoring/start.py +0 -0
  117. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/monitoring/watchdog/__init__.py +0 -0
  118. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/monitoring/watchdog/memory.py +0 -0
  119. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/network/__init__.py +0 -0
  120. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/network/camera.py +0 -0
  121. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/network/sniffer.py +0 -0
  122. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/network/tools.py +0 -0
  123. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/tools/__init__.py +0 -0
  124. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/tools/array.py +0 -0
  125. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/tools/aws/__init__.py +0 -0
  126. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/tools/aws/priority_queue.py +0 -0
  127. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/tools/aws/s3.py +0 -0
  128. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/tools/common.py +0 -0
  129. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/tools/complexity.py +0 -0
  130. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/tools/compress.py +0 -0
  131. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/tools/configuration.py +0 -0
  132. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/tools/crypto.py +0 -0
  133. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/tools/daemon.py +0 -0
  134. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/tools/formating.py +0 -0
  135. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/tools/hardware.py +0 -0
  136. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/tools/http.py +0 -0
  137. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/tools/logger.py +0 -0
  138. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/tools/os.py +0 -0
  139. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/tools/queue.py +0 -0
  140. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/tools/random.py +0 -0
  141. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/tools/remote.py +0 -0
  142. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/tools/simple_counter.py +0 -0
  143. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/tools/synchronized.py +0 -0
  144. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo/tools/timers.py +0 -0
  145. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo.egg-info/dependency_links.txt +0 -0
  146. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo.egg-info/entry_points.txt +0 -0
  147. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo.egg-info/requires.txt +0 -0
  148. {sciveo-0.1.54 → sciveo-0.1.56}/sciveo.egg-info/top_level.txt +0 -0
  149. {sciveo-0.1.54 → sciveo-0.1.56}/setup.cfg +0 -0
  150. {sciveo-0.1.54 → sciveo-0.1.56}/setup.py +0 -0
  151. {sciveo-0.1.54 → sciveo-0.1.56}/test/test_complexity.py +0 -0
  152. {sciveo-0.1.54 → sciveo-0.1.56}/test/test_compress.py +0 -0
  153. {sciveo-0.1.54 → sciveo-0.1.56}/test/test_configuration.py +0 -0
  154. {sciveo-0.1.54 → sciveo-0.1.56}/test/test_crypto.py +0 -0
  155. {sciveo-0.1.54 → sciveo-0.1.56}/test/test_ml_datasets.py +0 -0
  156. {sciveo-0.1.54 → sciveo-0.1.56}/test/test_monitoring.py +0 -0
  157. {sciveo-0.1.54 → sciveo-0.1.56}/test/test_runner.py +0 -0
  158. {sciveo-0.1.54 → sciveo-0.1.56}/test/test_sampling.py +0 -0
  159. {sciveo-0.1.54 → sciveo-0.1.56}/test/test_tokenizers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sciveo
3
- Version: 0.1.54
3
+ Version: 0.1.56
4
4
  Description-Content-Type: text/markdown
5
5
  Provides-Extra: mon
6
6
  Provides-Extra: net
@@ -25,12 +25,13 @@ from sciveo.tools.simple_counter import RunCounter
25
25
 
26
26
 
27
27
  class VideoCameraCaptureDaemon(DaemonBase):
28
- def __init__(self, cam_id, url, dst_path, max_video_len=60, transport="tcp"):
28
+ def __init__(self, cam_id, url, dst_path, max_video_len=60, transport="tcp", max_timeout=30):
29
29
  super().__init__()
30
30
  self.cam_id = cam_id
31
31
  self.url = url
32
32
  self.dst_path = dst_path
33
33
  self.max_video_len = max_video_len
34
+ self.max_timeout = max_timeout
34
35
  self.transport = transport
35
36
  self.cmd = [
36
37
  "ffmpeg",
@@ -42,10 +43,6 @@ class VideoCameraCaptureDaemon(DaemonBase):
42
43
  "-segment_time", f"{self.max_video_len}",
43
44
  "-reset_timestamps", "1",
44
45
  "-strftime", "1",
45
- "-reconnect", "1",
46
- "-reconnect_at_eof", "1",
47
- "-reconnect_streamed", "1",
48
- "-reconnect_delay_max", "5",
49
46
  f"{self.dst_path}/{self.cam_id}___%Y-%m-%d___%H-%M-%S.mp4"
50
47
  ]
51
48
 
@@ -61,17 +58,53 @@ class VideoCameraCaptureDaemon(DaemonBase):
61
58
 
62
59
  def loop(self):
63
60
  info("start", self.cam_id)
64
- while(True):
61
+
62
+ while True:
65
63
  self.clear()
66
64
 
67
- p = sp.Popen(self.cmd, stdout=sp.DEVNULL, stderr=sp.DEVNULL)
68
- p.wait()
65
+ try:
66
+ last_known_file = None
67
+ last_mod_time = None
68
+ last_progress_time = time.time()
69
+
70
+ p = sp.Popen(self.cmd, stdout=sp.DEVNULL, stderr=sp.DEVNULL)
71
+
72
+ while True:
73
+ time.sleep(5)
74
+
75
+ if p.poll() is not None:
76
+ break
77
+
78
+ current_files = [
79
+ f for f in os.listdir(self.dst_path)
80
+ if f.startswith(f"{self.cam_id}___") and f.endswith(".mp4")
81
+ ]
82
+ if current_files:
83
+ current_files.sort(key=lambda x: os.path.getmtime(os.path.join(self.dst_path, x)), reverse=True)
84
+ newest_file = os.path.join(self.dst_path, current_files[0])
85
+
86
+ mod_time = os.path.getmtime(newest_file)
87
+ if newest_file == last_known_file:
88
+ if mod_time > last_mod_time:
89
+ last_progress_time = time.time()
90
+ elif time.time() - last_progress_time > self.max_timeout:
91
+ warning(self.cam_id, f"No file update for over {self.max_timeout} seconds, killing ffmpeg...")
92
+ p.kill()
93
+ break
94
+ else:
95
+ last_known_file = newest_file
96
+ last_mod_time = mod_time
97
+ last_progress_time = time.time()
98
+
99
+ except Exception as e:
100
+ exception(e)
69
101
 
70
102
  self.clear()
71
103
  warning(self.cam_id, "streaming interrupted, wait to retry...")
72
104
  time.sleep(5)
73
105
 
74
106
 
107
+
75
108
  class VideoRecorder:
76
109
  def __init__(self, path_configuration):
77
110
  with open(path_configuration, 'r') as fp:
@@ -0,0 +1,227 @@
1
+ #
2
+ # Stanislav Georgiev, Softel Labs
3
+ #
4
+ # This is a proprietary file and may not be copied,
5
+ # distributed, or modified without express permission
6
+ # from the owner. For licensing inquiries, please
7
+ # contact s.georgiev@softel.bg.
8
+ #
9
+ # 2025
10
+ #
11
+
12
+ import re
13
+ import difflib
14
+ from collections import defaultdict
15
+
16
+
17
+ class EvalMarkdownSimple:
18
+ def __init__(self, md_true: str, md_predicted: str, similarity_threshold=0.8):
19
+ self.md_true = md_true.split("\n")
20
+ self.md_predicted = md_predicted.split("\n")
21
+ self.similarity_threshold = similarity_threshold
22
+ self.results = {"EM": [], "PM": [], "FN": [], "FP": [], "RI": [], "H": [], "FE": []}
23
+
24
+ def _find_best_match(self, text, candidates):
25
+ """
26
+ Finds the best matching text block from predicted Markdown using similarity comparison.
27
+
28
+ :param text: The labeled Markdown text to match.
29
+ :param candidates: The list of LLM-generated Markdown text blocks.
30
+ :return: (best_match, similarity_score) or (None, 0) if no match found.
31
+ """
32
+ best_match = None
33
+ best_score = 0
34
+ text_lower = text.lower()
35
+
36
+ for candidate in candidates:
37
+ score = difflib.SequenceMatcher(None, text_lower, candidate.lower()).ratio()
38
+ if score > best_score:
39
+ best_score = score
40
+ best_match = candidate
41
+
42
+ return (best_match, best_score) if best_score >= self.similarity_threshold else (None, 0)
43
+
44
+ def _check_formatting_errors(self, original, predicted):
45
+ """
46
+ Checks for incorrect Markdown formatting in predicted text.
47
+
48
+ :param original: The manually labeled Markdown text.
49
+ :param predicted: The LLM-generated Markdown text.
50
+ :return: True if formatting errors exist, False otherwise.
51
+ """
52
+ # Basic check: header formatting, bold/italic differences
53
+ if original.strip("#*`").strip() == predicted.strip("#*`").strip():
54
+ return True
55
+ return False
56
+
57
+ def evaluate(self):
58
+ """
59
+ Evaluates Markdown
60
+ """
61
+ matched_predicted_blocks = set()
62
+ predicted_idx_map = {block: idx for idx, block in enumerate(self.md_predicted)}
63
+
64
+ for true_text in self.md_true:
65
+ best_match, score = self._find_best_match(true_text, self.md_predicted)
66
+
67
+ if best_match:
68
+ matched_predicted_blocks.add(best_match)
69
+ if score == 1.0:
70
+ self.results["EM"].append((true_text, best_match))
71
+ else:
72
+ self.results["PM"].append((true_text, best_match, score))
73
+
74
+ # Check for formatting errors
75
+ if self._check_formatting_errors(true_text, best_match):
76
+ self.results["FE"].append((true_text, best_match))
77
+ else:
78
+ self.results["FN"].append(true_text)
79
+
80
+ # False Positives (extra predicted blocks that don't match labeled Markdown)
81
+ for pred_text in self.md_predicted:
82
+ if pred_text not in matched_predicted_blocks:
83
+ self.results["FP"].append(pred_text)
84
+
85
+ # Check for hallucinations (predicted content not in labeled text)
86
+ for pred_text in self.results["FP"]:
87
+ best_match, _ = self._find_best_match(pred_text, self.md_true)
88
+ if best_match is None:
89
+ self.results["H"].append(pred_text)
90
+
91
+ # Check for order issues (text found but misordered)
92
+ true_texts = [t for t, _ in self.results["EM"]] + [t for t, _, _ in self.results["PM"]]
93
+ pred_texts = [p for _, p in self.results["EM"]] + [p for _, p, _ in self.results["PM"]]
94
+
95
+ true_indices = [predicted_idx_map[text] for text in pred_texts if text in predicted_idx_map]
96
+ if true_indices != sorted(true_indices):
97
+ self.results["RI"].append(true_indices)
98
+
99
+ return self.results
100
+
101
+ def score(self):
102
+ """
103
+ Computes an improved similarity score with weighted Partial Matches (PM).
104
+ """
105
+ TP = len(self.results["EM"])
106
+ PM_weighted = sum(score for _, _, score in self.results["PM"])
107
+ FN = len(self.results["FN"])
108
+ FP = len(self.results["FP"])
109
+
110
+ precision = (TP + PM_weighted) / (TP + PM_weighted + FP) if (TP + PM_weighted + FP) > 0 else 0
111
+ recall = (TP + PM_weighted) / (TP + PM_weighted + FN) if (TP + PM_weighted + FN) > 0 else 0
112
+ f1_score = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
113
+
114
+ return {"Precision": precision, "Recall": recall, "F1 Score": f1_score}
115
+
116
+
117
+ class EvalMarkdown:
118
+ def __init__(self, md_true, md_predicted):
119
+ """
120
+ Evaluates labeled (true) Markdown against predicted Markdown with section-wise evaluation.
121
+ """
122
+ self.md_true = self._parse_markdown(md_true.lower())
123
+ self.md_predicted = self._parse_markdown(md_predicted.lower())
124
+ self.results = defaultdict(lambda: {"EM": [], "PM": [], "FN": [], "FP": []})
125
+
126
+ def _parse_markdown(self, markdown_text):
127
+ """
128
+ Parses Markdown into a dictionary of sections where key = heading, value = list of text blocks.
129
+ """
130
+ sections = defaultdict(list)
131
+ current_section = "INTRO" # Default section if no heading appears
132
+
133
+ for line in markdown_text.split("\n"):
134
+ heading_match = re.match(r"^(#{1,6})\s+(.+)", line)
135
+ if heading_match:
136
+ current_section = heading_match.group(2).strip() # Extract section title
137
+ else:
138
+ if line.strip():
139
+ sections[current_section].append(line.strip())
140
+
141
+ return sections
142
+
143
+ def _find_best_match(self, text, true_texts):
144
+ """
145
+ Finds the best match for a given text within a list of true texts.
146
+ Returns (best_match_text, similarity_score).
147
+ """
148
+ if not true_texts:
149
+ return None, 0
150
+
151
+ from difflib import SequenceMatcher
152
+ best_match, best_score = None, 0
153
+
154
+ for true_text in true_texts:
155
+ score = SequenceMatcher(None, text, true_text).ratio()
156
+ if score > best_score:
157
+ best_match, best_score = true_text, score
158
+
159
+ return best_match, best_score
160
+
161
+ def evaluate(self):
162
+ all_sections = set(self.md_true.keys()).union(set(self.md_predicted.keys()))
163
+ for section in all_sections:
164
+ true_texts = self.md_true.get(section, [])
165
+ pred_texts = self.md_predicted.get(section, [])
166
+
167
+ matched_true = set()
168
+ matched_pred = set()
169
+
170
+ # Exact matches
171
+ for pred_text in pred_texts:
172
+ if pred_text in true_texts:
173
+ self.results[section]["EM"].append((pred_text, pred_text))
174
+ matched_true.add(pred_text)
175
+ matched_pred.add(pred_text)
176
+
177
+ # Partial matches
178
+ for pred_text in pred_texts:
179
+ if pred_text not in matched_pred:
180
+ best_match, score = self._find_best_match(pred_text, true_texts)
181
+ if best_match and score > 0.8: # Accept only good matches
182
+ self.results[section]["PM"].append((best_match, pred_text, score))
183
+ matched_true.add(best_match)
184
+ matched_pred.add(pred_text)
185
+
186
+ # False negatives (missed text from ground truth)
187
+ for true_text in true_texts:
188
+ if true_text not in matched_true:
189
+ self.results[section]["FN"].append(true_text)
190
+
191
+ # False positives (extra predicted text)
192
+ for pred_text in pred_texts:
193
+ if pred_text not in matched_pred:
194
+ self.results[section]["FP"].append(pred_text)
195
+
196
+ return self.results
197
+
198
+ def score(self):
199
+ """
200
+ Computes section-wise and global similarity scores.
201
+ """
202
+ section_scores = {}
203
+ global_TP, global_PM, global_FN, global_FP = 0, 0, 0, 0
204
+
205
+ for section, result in self.results.items():
206
+ TP = len(result["EM"])
207
+ PM_weighted = sum(score for _, _, score in result["PM"])
208
+ FN = len(result["FN"])
209
+ FP = len(result["FP"])
210
+
211
+ precision = (TP + PM_weighted) / (TP + PM_weighted + FP) if (TP + PM_weighted + FP) > 0 else 0
212
+ recall = (TP + PM_weighted) / (TP + PM_weighted + FN) if (TP + PM_weighted + FN) > 0 else 0
213
+ f1_score = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
214
+
215
+ section_scores[section] = {"Precision": precision, "Recall": recall, "F1 Score": f1_score}
216
+
217
+ global_TP += TP
218
+ global_PM += PM_weighted
219
+ global_FN += FN
220
+ global_FP += FP
221
+
222
+ # Global precision/recall across all sections
223
+ global_precision = (global_TP + global_PM) / (global_TP + global_PM + global_FP) if (global_TP + global_PM + global_FP) > 0 else 0
224
+ global_recall = (global_TP + global_PM) / (global_TP + global_PM + global_FN) if (global_TP + global_PM + global_FN) > 0 else 0
225
+ global_f1 = (2 * global_precision * global_recall) / (global_precision + global_recall) if (global_precision + global_recall) > 0 else 0
226
+
227
+ return {"Sections": section_scores, "Global": {"Precision": global_precision, "Recall": global_recall, "F1 Score": global_f1}}
@@ -169,7 +169,7 @@ class BaseMonitor(DaemonBase):
169
169
 
170
170
  lines = result.stdout.strip().split('\n')
171
171
  self.data["GPU"] = {
172
- "raw_lines": lines
172
+ "raw_lines": lines[:7] # TODO: FIX this! Due to aws timestream constraints.
173
173
  }
174
174
  except Exception as e:
175
175
  pass
@@ -0,0 +1,2 @@
1
+
2
+ __version__ = '0.1.56'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sciveo
3
- Version: 0.1.54
3
+ Version: 0.1.56
4
4
  Description-Content-Type: text/markdown
5
5
  Provides-Extra: mon
6
6
  Provides-Extra: net
@@ -97,13 +97,13 @@ sciveo/ml/base.py
97
97
  sciveo/ml/dataset/__init__.py
98
98
  sciveo/ml/dataset/object_detection.py
99
99
  sciveo/ml/evaluation/__init__.py
100
+ sciveo/ml/evaluation/markdown.py
100
101
  sciveo/ml/evaluation/object_detection.py
101
102
  sciveo/ml/images/__init__.py
102
103
  sciveo/ml/images/base.py
103
104
  sciveo/ml/images/description.py
104
105
  sciveo/ml/images/embeddings.py
105
106
  sciveo/ml/images/object_detection.py
106
- sciveo/ml/images/segmentation.py
107
107
  sciveo/ml/images/tools.py
108
108
  sciveo/ml/images/transformers.py
109
109
  sciveo/ml/nlp/__init__.py
@@ -147,6 +147,7 @@ test/test_complexity.py
147
147
  test/test_compress.py
148
148
  test/test_configuration.py
149
149
  test/test_crypto.py
150
+ test/test_eval_markdown.py
150
151
  test/test_ml_datasets.py
151
152
  test/test_monitoring.py
152
153
  test/test_runner.py
@@ -0,0 +1,41 @@
1
+ #
2
+ # Stanislav Georgiev, Softel Labs
3
+ #
4
+ # This is a proprietary file and may not be copied,
5
+ # distributed, or modified without express permission
6
+ # from the owner. For licensing inquiries, please
7
+ # contact s.georgiev@softel.bg.
8
+ #
9
+ # 2025
10
+ #
11
+
12
+ import math
13
+ import unittest
14
+
15
+ from sciveo.tools.logger import *
16
+ from sciveo.ml.evaluation.markdown import *
17
+
18
+
19
+ class TestEvalMarkdown(unittest.TestCase):
20
+ def test_1(self):
21
+ md_true = """
22
+ "# Breaking News"
23
+ "A major fire broke out in the city center."
24
+ "Authorities are investigating the cause."
25
+ """
26
+
27
+ md_predicted = """
28
+ "# BREAKING NEWS",
29
+ "A major fire broke out in city center.",
30
+ "Authorities investigate the cause.",
31
+ "Stay tuned for updates."
32
+ """
33
+
34
+ em = EvalMarkdown(md_true, md_predicted)
35
+ results = em.evaluate()
36
+ info(results)
37
+ info("Score", em.score())
38
+
39
+
40
+ if __name__ == '__main__':
41
+ unittest.main()