sciveo 0.1.54__tar.gz → 0.1.55__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. {sciveo-0.1.54 → sciveo-0.1.55}/PKG-INFO +1 -1
  2. sciveo-0.1.55/sciveo/ml/evaluation/markdown.py +227 -0
  3. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/monitoring/monitor.py +1 -1
  4. sciveo-0.1.55/sciveo/version.py +2 -0
  5. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo.egg-info/PKG-INFO +1 -1
  6. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo.egg-info/SOURCES.txt +2 -1
  7. sciveo-0.1.55/test/test_eval_markdown.py +41 -0
  8. sciveo-0.1.54/sciveo/ml/images/segmentation.py +0 -304
  9. sciveo-0.1.54/sciveo/version.py +0 -2
  10. {sciveo-0.1.54 → sciveo-0.1.55}/README.md +0 -0
  11. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/__init__.py +0 -0
  12. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/api/__init__.py +0 -0
  13. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/api/base.py +0 -0
  14. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/api/predictors.py +0 -0
  15. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/api/server.py +0 -0
  16. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/api/upload.py +0 -0
  17. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/cli.py +0 -0
  18. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/common/__init__.py +0 -0
  19. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/common/configuration.py +0 -0
  20. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/common/model.py +0 -0
  21. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/common/optimizers.py +0 -0
  22. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/common/sampling.py +0 -0
  23. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/content/__init__.py +0 -0
  24. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/content/dataset.py +0 -0
  25. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/content/experiment.py +0 -0
  26. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/content/project.py +0 -0
  27. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/content/runner.py +0 -0
  28. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/__init__.py +0 -0
  29. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/ml/__init__.py +0 -0
  30. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/ml/base.py +0 -0
  31. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/ml/encoders/__init__.py +0 -0
  32. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/ml/encoders/base.py +0 -0
  33. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/ml/encoders/normalizer.py +0 -0
  34. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/ml/nlp/__init__.py +0 -0
  35. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/ml/nlp/search.py +0 -0
  36. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/ml/time_series/__init__.py +0 -0
  37. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/ml/time_series/dataset.py +0 -0
  38. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/ml/time_series/predictor.py +0 -0
  39. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/ml/time_series/trainer.py +0 -0
  40. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/ml/time_series/window_generator.py +0 -0
  41. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/__init__.py +0 -0
  42. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/base.py +0 -0
  43. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/job_daemon.py +0 -0
  44. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/layouts/__init__.py +0 -0
  45. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/layouts/base.py +0 -0
  46. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/pipeline.py +0 -0
  47. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/postprocessors/__init__.py +0 -0
  48. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/postprocessors/base.py +0 -0
  49. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/postprocessors/default.py +0 -0
  50. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/__init__.py +0 -0
  51. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/audio/__init__.py +0 -0
  52. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/audio/audio.py +0 -0
  53. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/audio/audio_extractor_process.py +0 -0
  54. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/aws.py +0 -0
  55. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/base.py +0 -0
  56. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/file/__init__.py +0 -0
  57. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/file/archive.py +0 -0
  58. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/image/__init__.py +0 -0
  59. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/image/album.py +0 -0
  60. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/image/album_in_image.py +0 -0
  61. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/image/depth_esimation.py +0 -0
  62. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/image/embeddings.py +0 -0
  63. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/image/filters.py +0 -0
  64. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/image/generators.py +0 -0
  65. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/image/histogram.py +0 -0
  66. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/image/mask.py +0 -0
  67. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/image/object_detection.py +0 -0
  68. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/image/resize.py +0 -0
  69. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/image/segmentation.py +0 -0
  70. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/image/watermark.py +0 -0
  71. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/media_info.py +0 -0
  72. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/nlp/__init__.py +0 -0
  73. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/nlp/address.py +0 -0
  74. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/qr.py +0 -0
  75. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/sci/__init__.py +0 -0
  76. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/sci/base.py +0 -0
  77. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/sci/dataset.py +0 -0
  78. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/sci/time_series/__init__.py +0 -0
  79. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/sci/time_series/predictor.py +0 -0
  80. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/sci/time_series/trainer.py +0 -0
  81. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/tpu_base.py +0 -0
  82. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/video/__init__.py +0 -0
  83. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/video/generators.py +0 -0
  84. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/video/motion_detection.py +0 -0
  85. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/video/resize.py +0 -0
  86. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/video/video_album.py +0 -0
  87. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/video/video_frames.py +0 -0
  88. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/processors/video/video_resample.py +0 -0
  89. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/queues.py +0 -0
  90. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/server.py +0 -0
  91. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/web/__init__.py +0 -0
  92. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/pipelines/web/server.py +0 -0
  93. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/tools/__init__.py +0 -0
  94. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/tools/nvr.py +0 -0
  95. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/media/tools/video_interactive.py +0 -0
  96. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/ml/__init__.py +0 -0
  97. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/ml/base.py +0 -0
  98. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/ml/dataset/__init__.py +0 -0
  99. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/ml/dataset/object_detection.py +0 -0
  100. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/ml/evaluation/__init__.py +0 -0
  101. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/ml/evaluation/object_detection.py +0 -0
  102. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/ml/images/__init__.py +0 -0
  103. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/ml/images/base.py +0 -0
  104. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/ml/images/description.py +0 -0
  105. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/ml/images/embeddings.py +0 -0
  106. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/ml/images/object_detection.py +0 -0
  107. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/ml/images/tools.py +0 -0
  108. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/ml/images/transformers.py +0 -0
  109. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/ml/nlp/__init__.py +0 -0
  110. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/ml/nlp/embeddings.py +0 -0
  111. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/ml/nlp/tokenizers/__init__.py +0 -0
  112. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/ml/nlp/tokenizers/bpe.py +0 -0
  113. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/ml/video/__init__.py +0 -0
  114. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/ml/video/description.py +0 -0
  115. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/monitoring/__init__.py +0 -0
  116. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/monitoring/start.py +0 -0
  117. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/monitoring/watchdog/__init__.py +0 -0
  118. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/monitoring/watchdog/memory.py +0 -0
  119. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/network/__init__.py +0 -0
  120. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/network/camera.py +0 -0
  121. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/network/sniffer.py +0 -0
  122. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/network/tools.py +0 -0
  123. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/tools/__init__.py +0 -0
  124. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/tools/array.py +0 -0
  125. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/tools/aws/__init__.py +0 -0
  126. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/tools/aws/priority_queue.py +0 -0
  127. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/tools/aws/s3.py +0 -0
  128. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/tools/common.py +0 -0
  129. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/tools/complexity.py +0 -0
  130. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/tools/compress.py +0 -0
  131. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/tools/configuration.py +0 -0
  132. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/tools/crypto.py +0 -0
  133. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/tools/daemon.py +0 -0
  134. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/tools/formating.py +0 -0
  135. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/tools/hardware.py +0 -0
  136. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/tools/http.py +0 -0
  137. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/tools/logger.py +0 -0
  138. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/tools/os.py +0 -0
  139. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/tools/queue.py +0 -0
  140. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/tools/random.py +0 -0
  141. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/tools/remote.py +0 -0
  142. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/tools/simple_counter.py +0 -0
  143. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/tools/synchronized.py +0 -0
  144. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo/tools/timers.py +0 -0
  145. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo.egg-info/dependency_links.txt +0 -0
  146. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo.egg-info/entry_points.txt +0 -0
  147. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo.egg-info/requires.txt +0 -0
  148. {sciveo-0.1.54 → sciveo-0.1.55}/sciveo.egg-info/top_level.txt +0 -0
  149. {sciveo-0.1.54 → sciveo-0.1.55}/setup.cfg +0 -0
  150. {sciveo-0.1.54 → sciveo-0.1.55}/setup.py +0 -0
  151. {sciveo-0.1.54 → sciveo-0.1.55}/test/test_complexity.py +0 -0
  152. {sciveo-0.1.54 → sciveo-0.1.55}/test/test_compress.py +0 -0
  153. {sciveo-0.1.54 → sciveo-0.1.55}/test/test_configuration.py +0 -0
  154. {sciveo-0.1.54 → sciveo-0.1.55}/test/test_crypto.py +0 -0
  155. {sciveo-0.1.54 → sciveo-0.1.55}/test/test_ml_datasets.py +0 -0
  156. {sciveo-0.1.54 → sciveo-0.1.55}/test/test_monitoring.py +0 -0
  157. {sciveo-0.1.54 → sciveo-0.1.55}/test/test_runner.py +0 -0
  158. {sciveo-0.1.54 → sciveo-0.1.55}/test/test_sampling.py +0 -0
  159. {sciveo-0.1.54 → sciveo-0.1.55}/test/test_tokenizers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sciveo
3
- Version: 0.1.54
3
+ Version: 0.1.55
4
4
  Description-Content-Type: text/markdown
5
5
  Provides-Extra: mon
6
6
  Provides-Extra: net
@@ -0,0 +1,227 @@
1
+ #
2
+ # Stanislav Georgiev, Softel Labs
3
+ #
4
+ # This is a proprietary file and may not be copied,
5
+ # distributed, or modified without express permission
6
+ # from the owner. For licensing inquiries, please
7
+ # contact s.georgiev@softel.bg.
8
+ #
9
+ # 2025
10
+ #
11
+
12
+ import re
13
+ import difflib
14
+ from collections import defaultdict
15
+
16
+
17
+ class EvalMarkdownSimple:
18
+ def __init__(self, md_true: str, md_predicted: str, similarity_threshold=0.8):
19
+ self.md_true = md_true.split("\n")
20
+ self.md_predicted = md_predicted.split("\n")
21
+ self.similarity_threshold = similarity_threshold
22
+ self.results = {"EM": [], "PM": [], "FN": [], "FP": [], "RI": [], "H": [], "FE": []}
23
+
24
+ def _find_best_match(self, text, candidates):
25
+ """
26
+ Finds the best matching text block from predicted Markdown using similarity comparison.
27
+
28
+ :param text: The labeled Markdown text to match.
29
+ :param candidates: The list of LLM-generated Markdown text blocks.
30
+ :return: (best_match, similarity_score) or (None, 0) if no match found.
31
+ """
32
+ best_match = None
33
+ best_score = 0
34
+ text_lower = text.lower()
35
+
36
+ for candidate in candidates:
37
+ score = difflib.SequenceMatcher(None, text_lower, candidate.lower()).ratio()
38
+ if score > best_score:
39
+ best_score = score
40
+ best_match = candidate
41
+
42
+ return (best_match, best_score) if best_score >= self.similarity_threshold else (None, 0)
43
+
44
+ def _check_formatting_errors(self, original, predicted):
45
+ """
46
+ Checks for incorrect Markdown formatting in predicted text.
47
+
48
+ :param original: The manually labeled Markdown text.
49
+ :param predicted: The LLM-generated Markdown text.
50
+ :return: True if formatting errors exist, False otherwise.
51
+ """
52
+ # Basic check: header formatting, bold/italic differences
53
+ if original.strip("#*`").strip() == predicted.strip("#*`").strip():
54
+ return True
55
+ return False
56
+
57
+ def evaluate(self):
58
+ """
59
+ Evaluates Markdown
60
+ """
61
+ matched_predicted_blocks = set()
62
+ predicted_idx_map = {block: idx for idx, block in enumerate(self.md_predicted)}
63
+
64
+ for true_text in self.md_true:
65
+ best_match, score = self._find_best_match(true_text, self.md_predicted)
66
+
67
+ if best_match:
68
+ matched_predicted_blocks.add(best_match)
69
+ if score == 1.0:
70
+ self.results["EM"].append((true_text, best_match))
71
+ else:
72
+ self.results["PM"].append((true_text, best_match, score))
73
+
74
+ # Check for formatting errors
75
+ if self._check_formatting_errors(true_text, best_match):
76
+ self.results["FE"].append((true_text, best_match))
77
+ else:
78
+ self.results["FN"].append(true_text)
79
+
80
+ # False Positives (extra predicted blocks that don't match labeled Markdown)
81
+ for pred_text in self.md_predicted:
82
+ if pred_text not in matched_predicted_blocks:
83
+ self.results["FP"].append(pred_text)
84
+
85
+ # Check for hallucinations (predicted content not in labeled text)
86
+ for pred_text in self.results["FP"]:
87
+ best_match, _ = self._find_best_match(pred_text, self.md_true)
88
+ if best_match is None:
89
+ self.results["H"].append(pred_text)
90
+
91
+ # Check for order issues (text found but misordered)
92
+ true_texts = [t for t, _ in self.results["EM"]] + [t for t, _, _ in self.results["PM"]]
93
+ pred_texts = [p for _, p in self.results["EM"]] + [p for _, p, _ in self.results["PM"]]
94
+
95
+ true_indices = [predicted_idx_map[text] for text in pred_texts if text in predicted_idx_map]
96
+ if true_indices != sorted(true_indices):
97
+ self.results["RI"].append(true_indices)
98
+
99
+ return self.results
100
+
101
+ def score(self):
102
+ """
103
+ Computes an improved similarity score with weighted Partial Matches (PM).
104
+ """
105
+ TP = len(self.results["EM"])
106
+ PM_weighted = sum(score for _, _, score in self.results["PM"])
107
+ FN = len(self.results["FN"])
108
+ FP = len(self.results["FP"])
109
+
110
+ precision = (TP + PM_weighted) / (TP + PM_weighted + FP) if (TP + PM_weighted + FP) > 0 else 0
111
+ recall = (TP + PM_weighted) / (TP + PM_weighted + FN) if (TP + PM_weighted + FN) > 0 else 0
112
+ f1_score = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
113
+
114
+ return {"Precision": precision, "Recall": recall, "F1 Score": f1_score}
115
+
116
+
117
+ class EvalMarkdown:
118
+ def __init__(self, md_true, md_predicted):
119
+ """
120
+ Evaluates labeled (true) Markdown against predicted Markdown with section-wise evaluation.
121
+ """
122
+ self.md_true = self._parse_markdown(md_true.lower())
123
+ self.md_predicted = self._parse_markdown(md_predicted.lower())
124
+ self.results = defaultdict(lambda: {"EM": [], "PM": [], "FN": [], "FP": []})
125
+
126
+ def _parse_markdown(self, markdown_text):
127
+ """
128
+ Parses Markdown into a dictionary of sections where key = heading, value = list of text blocks.
129
+ """
130
+ sections = defaultdict(list)
131
+ current_section = "INTRO" # Default section if no heading appears
132
+
133
+ for line in markdown_text.split("\n"):
134
+ heading_match = re.match(r"^(#{1,6})\s+(.+)", line)
135
+ if heading_match:
136
+ current_section = heading_match.group(2).strip() # Extract section title
137
+ else:
138
+ if line.strip():
139
+ sections[current_section].append(line.strip())
140
+
141
+ return sections
142
+
143
+ def _find_best_match(self, text, true_texts):
144
+ """
145
+ Finds the best match for a given text within a list of true texts.
146
+ Returns (best_match_text, similarity_score).
147
+ """
148
+ if not true_texts:
149
+ return None, 0
150
+
151
+ from difflib import SequenceMatcher
152
+ best_match, best_score = None, 0
153
+
154
+ for true_text in true_texts:
155
+ score = SequenceMatcher(None, text, true_text).ratio()
156
+ if score > best_score:
157
+ best_match, best_score = true_text, score
158
+
159
+ return best_match, best_score
160
+
161
+ def evaluate(self):
162
+ all_sections = set(self.md_true.keys()).union(set(self.md_predicted.keys()))
163
+ for section in all_sections:
164
+ true_texts = self.md_true.get(section, [])
165
+ pred_texts = self.md_predicted.get(section, [])
166
+
167
+ matched_true = set()
168
+ matched_pred = set()
169
+
170
+ # Exact matches
171
+ for pred_text in pred_texts:
172
+ if pred_text in true_texts:
173
+ self.results[section]["EM"].append((pred_text, pred_text))
174
+ matched_true.add(pred_text)
175
+ matched_pred.add(pred_text)
176
+
177
+ # Partial matches
178
+ for pred_text in pred_texts:
179
+ if pred_text not in matched_pred:
180
+ best_match, score = self._find_best_match(pred_text, true_texts)
181
+ if best_match and score > 0.8: # Accept only good matches
182
+ self.results[section]["PM"].append((best_match, pred_text, score))
183
+ matched_true.add(best_match)
184
+ matched_pred.add(pred_text)
185
+
186
+ # False negatives (missed text from ground truth)
187
+ for true_text in true_texts:
188
+ if true_text not in matched_true:
189
+ self.results[section]["FN"].append(true_text)
190
+
191
+ # False positives (extra predicted text)
192
+ for pred_text in pred_texts:
193
+ if pred_text not in matched_pred:
194
+ self.results[section]["FP"].append(pred_text)
195
+
196
+ return self.results
197
+
198
+ def score(self):
199
+ """
200
+ Computes section-wise and global similarity scores.
201
+ """
202
+ section_scores = {}
203
+ global_TP, global_PM, global_FN, global_FP = 0, 0, 0, 0
204
+
205
+ for section, result in self.results.items():
206
+ TP = len(result["EM"])
207
+ PM_weighted = sum(score for _, _, score in result["PM"])
208
+ FN = len(result["FN"])
209
+ FP = len(result["FP"])
210
+
211
+ precision = (TP + PM_weighted) / (TP + PM_weighted + FP) if (TP + PM_weighted + FP) > 0 else 0
212
+ recall = (TP + PM_weighted) / (TP + PM_weighted + FN) if (TP + PM_weighted + FN) > 0 else 0
213
+ f1_score = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
214
+
215
+ section_scores[section] = {"Precision": precision, "Recall": recall, "F1 Score": f1_score}
216
+
217
+ global_TP += TP
218
+ global_PM += PM_weighted
219
+ global_FN += FN
220
+ global_FP += FP
221
+
222
+ # Global precision/recall across all sections
223
+ global_precision = (global_TP + global_PM) / (global_TP + global_PM + global_FP) if (global_TP + global_PM + global_FP) > 0 else 0
224
+ global_recall = (global_TP + global_PM) / (global_TP + global_PM + global_FN) if (global_TP + global_PM + global_FN) > 0 else 0
225
+ global_f1 = (2 * global_precision * global_recall) / (global_precision + global_recall) if (global_precision + global_recall) > 0 else 0
226
+
227
+ return {"Sections": section_scores, "Global": {"Precision": global_precision, "Recall": global_recall, "F1 Score": global_f1}}
@@ -169,7 +169,7 @@ class BaseMonitor(DaemonBase):
169
169
 
170
170
  lines = result.stdout.strip().split('\n')
171
171
  self.data["GPU"] = {
172
- "raw_lines": lines
172
+ "raw_lines": lines[:7] # TODO: FIX this! Due to aws timestream constraints.
173
173
  }
174
174
  except Exception as e:
175
175
  pass
@@ -0,0 +1,2 @@
1
+
2
+ __version__ = '0.1.55'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sciveo
3
- Version: 0.1.54
3
+ Version: 0.1.55
4
4
  Description-Content-Type: text/markdown
5
5
  Provides-Extra: mon
6
6
  Provides-Extra: net
@@ -97,13 +97,13 @@ sciveo/ml/base.py
97
97
  sciveo/ml/dataset/__init__.py
98
98
  sciveo/ml/dataset/object_detection.py
99
99
  sciveo/ml/evaluation/__init__.py
100
+ sciveo/ml/evaluation/markdown.py
100
101
  sciveo/ml/evaluation/object_detection.py
101
102
  sciveo/ml/images/__init__.py
102
103
  sciveo/ml/images/base.py
103
104
  sciveo/ml/images/description.py
104
105
  sciveo/ml/images/embeddings.py
105
106
  sciveo/ml/images/object_detection.py
106
- sciveo/ml/images/segmentation.py
107
107
  sciveo/ml/images/tools.py
108
108
  sciveo/ml/images/transformers.py
109
109
  sciveo/ml/nlp/__init__.py
@@ -147,6 +147,7 @@ test/test_complexity.py
147
147
  test/test_compress.py
148
148
  test/test_configuration.py
149
149
  test/test_crypto.py
150
+ test/test_eval_markdown.py
150
151
  test/test_ml_datasets.py
151
152
  test/test_monitoring.py
152
153
  test/test_runner.py
@@ -0,0 +1,41 @@
1
+ #
2
+ # Stanislav Georgiev, Softel Labs
3
+ #
4
+ # This is a proprietary file and may not be copied,
5
+ # distributed, or modified without express permission
6
+ # from the owner. For licensing inquiries, please
7
+ # contact s.georgiev@softel.bg.
8
+ #
9
+ # 2025
10
+ #
11
+
12
+ import math
13
+ import unittest
14
+
15
+ from sciveo.tools.logger import *
16
+ from sciveo.ml.evaluation.markdown import *
17
+
18
+
19
+ class TestEvalMarkdown(unittest.TestCase):
20
+ def test_1(self):
21
+ md_true = """
22
+ "# Breaking News"
23
+ "A major fire broke out in the city center."
24
+ "Authorities are investigating the cause."
25
+ """
26
+
27
+ md_predicted = """
28
+ "# BREAKING NEWS",
29
+ "A major fire broke out in city center.",
30
+ "Authorities investigate the cause.",
31
+ "Stay tuned for updates."
32
+ """
33
+
34
+ em = EvalMarkdown(md_true, md_predicted)
35
+ results = em.evaluate()
36
+ info(results)
37
+ info("Score", em.score())
38
+
39
+
40
+ if __name__ == '__main__':
41
+ unittest.main()
@@ -1,304 +0,0 @@
1
- #
2
- # Pavlin Georgiev, Softel Labs
3
- #
4
- # This is a proprietary file and may not be copied,
5
- # distributed, or modified without express permission
6
- # from the owner. For licensing inquiries, please
7
- # contact pavlin@softel.bg.
8
- #
9
- # 2024
10
- #
11
-
12
- import os
13
- import threading
14
- import cv2
15
- from PIL import Image
16
- import json
17
- import numpy as np
18
-
19
- import matplotlib
20
- import matplotlib.pyplot as plt
21
- import matplotlib.patches as patches
22
-
23
- import torch
24
- import transformers
25
- import torchvision
26
-
27
- from transformers import AutoImageProcessor, MaskFormerForInstanceSegmentation
28
-
29
-
30
- class ImageTilesSplit:
31
- def __init__(self, image, name):
32
- self.image = image
33
- self.name = name
34
-
35
- def split(self, tile_size=(640, 640)):
36
- if self.image.shape[2] > 1:
37
- original_image = Image.fromarray(cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB))
38
- else:
39
- original_image = Image.fromarray(self.image)
40
- original_width, original_height = original_image.size
41
-
42
- num_tiles_x = (original_width + tile_size[0] - 1) // tile_size[0]
43
- num_tiles_y = (original_height + tile_size[1] - 1) // tile_size[1]
44
-
45
- self.tiles_info = {
46
- 'image': self.image,
47
- 'original_image': original_image,
48
- 'original_size': (original_width, original_height),
49
- 'tile_size': tile_size,
50
- 'num_tiles_x': num_tiles_x,
51
- 'num_tiles_y': num_tiles_y,
52
- 'tiles': {}
53
- }
54
-
55
- for i in range(num_tiles_x):
56
- for j in range(num_tiles_y):
57
- left = i * tile_size[0]
58
- upper = j * tile_size[1]
59
- right = min(left + tile_size[0], original_width)
60
- lower = min(upper + tile_size[1], original_height)
61
-
62
- tile = original_image.crop((left, upper, right, lower))
63
-
64
- tile_key = f'tile_{i}_{j}'
65
- self.tiles_info['tiles'][tile_key] = {
66
- 'position': (i, j),
67
- 'box': (left, upper, right, lower),
68
- 'tile': tile,
69
- # 'cv2.tile': cv2.cvtColor(np.array(tile), cv2.COLOR_RGB2BGR)
70
- 'cv2.tile': np.array(tile)
71
- }
72
-
73
- return self.tiles_info
74
-
75
- def join(self, tile_join_key="predicted"):
76
- joined = np.zeros((self.tiles_info['original_size'][1], self.tiles_info['original_size'][0], 1), dtype=np.uint8)
77
- for tile_key, tile_info in self.tiles_info['tiles'].items():
78
- box = tile_info['box']
79
- joined[box[1]:box[3], box[0]:box[2], 0] = tile_info[tile_join_key]
80
- joined = np.squeeze(joined, axis=-1)
81
- self.tiles_info[tile_join_key] = joined
82
- return joined
83
-
84
- def get_original_coordinates(self, tile_key, x, y):
85
- """
86
- Converts coordinates from a tile back to the original image.
87
-
88
- Args:
89
- tile_key (str): The key of the tile in the tiles_info dictionary.
90
- x (int): The x-coordinate in the tile.
91
- y (int): The y-coordinate in the tile.
92
-
93
- Returns:
94
- tuple: The coordinates (x_original, y_original) in the original image.
95
- """
96
- tile_data = self.tiles_info['tiles'][tile_key]
97
- left, upper, _, _ = tile_data['box']
98
-
99
- x_original = left + x
100
- y_original = upper + y
101
-
102
- return (x_original, y_original)
103
-
104
- def plot_tiles_with_grid(self):
105
- original_width, original_height = self.tiles_info['original_size']
106
- tile_width, tile_height = self.tiles_info['tile_size']
107
- num_tiles_x = self.tiles_info['num_tiles_x']
108
- num_tiles_y = self.tiles_info['num_tiles_y']
109
-
110
- fig, ax = plt.subplots(figsize=(10, 10))
111
-
112
- ax.imshow(self.tiles_info['original_image'])
113
-
114
- for i in range(num_tiles_x + 1):
115
- x = i * tile_width
116
- ax.axvline(x=x, color='r', linestyle='--', linewidth=1)
117
-
118
- for j in range(num_tiles_y + 1):
119
- y = j * tile_height
120
- ax.axhline(y=y, color='r', linestyle='--', linewidth=1)
121
-
122
- ax.set_xlim(0, original_width)
123
- ax.set_ylim(original_height, 0)
124
-
125
- for i in range(num_tiles_x):
126
- for j in range(num_tiles_y):
127
- x_center = i * tile_width + tile_width / 2
128
- y_center = j * tile_height + tile_height / 2
129
- ax.text(x_center, y_center, f'{i},{j}', color=(0,1,0), fontsize=7, ha='center', va='center')
130
-
131
- plt.title(f"{self.name} Grid")
132
- plt.xlabel("X")
133
- plt.ylabel("Y")
134
- plt.show()
135
-
136
- def plot_tiles_individually(self):
137
- num_tiles_x = self.tiles_info['num_tiles_x']
138
- num_tiles_y = self.tiles_info['num_tiles_y']
139
-
140
- fig, axes = plt.subplots(num_tiles_y, num_tiles_x, figsize=(15, 15))
141
-
142
- if num_tiles_x == 1 and num_tiles_y == 1:
143
- axes = [[axes]]
144
- elif num_tiles_x == 1:
145
- axes = [[ax] for ax in axes]
146
- elif num_tiles_y == 1:
147
- axes = [axes]
148
-
149
- for i in range(num_tiles_x):
150
- for j in range(num_tiles_y):
151
- tile_key = f'tile_{i}_{j}'
152
- tile = self.tiles_info['tiles'][tile_key]['tile']
153
-
154
- ax = axes[j][i]
155
-
156
- ax.imshow(tile, cmap='gray')
157
- ax.set_title(f'{i}.{j}', fontsize=8)
158
- ax.axis('off')
159
-
160
- plt.tight_layout()
161
- plt.show()
162
-
163
-
164
- class MaskInstancePredictor:
165
- def __init__(self, cache_dir, device="cuda", colors=None):
166
- if colors is None:
167
- self.colors = [
168
- [0, 0, 255],
169
- [0, 255, 0],
170
- [255, 0, 0],
171
- [255, 255, 255]
172
- ]
173
- else:
174
- self.colors = colors
175
-
176
- self.device = device
177
- self.cache_dir = cache_dir
178
-
179
- self.processor = AutoImageProcessor.from_pretrained("facebook/maskformer-swin-base-ade")
180
- self.model = MaskFormerForInstanceSegmentation.from_pretrained(
181
- "facebook/maskformer-swin-base-ade",
182
- cache_dir=cache_dir
183
- ).to(self.device)
184
-
185
- def relabel_predictions(self, predictions, label_map, new_labels):
186
- relabeled = np.full_like(predictions, fill_value=-1)
187
-
188
- for label_id, label_name in label_map.items():
189
- if label_name in new_labels:
190
- relabeled[predictions == label_id] = new_labels[label_name]
191
-
192
- return relabeled
193
-
194
- def predict_one(self, image):
195
- inputs = self.processor(images=image, return_tensors="pt").to(self.device)
196
- with torch.no_grad():
197
- outputs = self.model(**inputs)
198
-
199
- predicted = self.processor.post_process_semantic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]
200
- return predicted.to("cpu")
201
-
202
- def plot_mask(self, image, mask, alpha=0.5):
203
- color_mask = np.zeros((mask.shape[0], mask.shape[1], 3), dtype=np.uint8) # height, width, 3
204
- for label, color in enumerate(self.colors):
205
- color_mask[mask == label, :] = color
206
- color_mask = color_mask[..., ::-1] # Convert to BGR
207
-
208
- masked_image = np.array(image) * (1 - alpha) + color_mask * alpha
209
- masked_image = masked_image.astype(np.uint8)
210
-
211
- plt.figure(figsize=(15, 10))
212
- plt.imshow(masked_image)
213
- plt.show()
214
-
215
-
216
- class UrbanPredictor:
217
- def __init__(self, name, cache_dir, device="cuda"):
218
- self.name = name
219
- self.device = device
220
- self.cache_dir = cache_dir
221
-
222
- self.predictor = MaskInstancePredictor(cache_dir, device=device)
223
- self._classes = {"artificial": 0, "natural": 1}
224
- self.ade_classes = {
225
- 0: 'artificial', 1: 'artificial', 2: 'natural', 3: 'artificial', 4: 'natural', 5: 'artificial',6: 'artificial',7: 'artificial',8: 'artificial',9: 'natural',
226
- 10: 'artificial',11: 'artificial',12: 'artificial',13: 'natural',14: 'artificial',15: 'artificial',16: 'natural',17: 'natural',18: 'artificial',19: 'artificial',
227
- 20: 'artificial',21: 'natural',22: 'artificial',23: 'artificial',24: 'artificial',25: 'artificial',26: 'natural',27: 'artificial',28: 'artificial',29: 'natural',
228
- 30: 'artificial',31: 'artificial',32: 'artificial',33: 'artificial',34: 'natural',35: 'artificial',36: 'artificial',37: 'artificial',38: 'artificial',39: 'artificial',
229
- 40: 'artificial',41: 'artificial',42: 'artificial',43: 'artificial',44: 'artificial',45: 'artificial',46: 'natural',47: 'artificial',48: 'artificial',49: 'artificial',
230
- 50: 'artificial',51: 'artificial',52: 'artificial',53: 'artificial',54: 'artificial',55: 'artificial',56: 'artificial',57: 'artificial',58: 'artificial',59: 'artificial',
231
- 60: 'natural',61: 'artificial',62: 'artificial', 63: 'artificial', 64: 'artificial',65: 'artificial',66: 'natural',67: 'artificial',68: 'natural',69: 'artificial',
232
- 70: 'artificial',71: 'artificial',72: 'natural',73: 'artificial',74: 'artificial',75: 'artificial',76: 'artificial',77: 'artificial',78: 'artificial',79: 'artificial',
233
- 80: 'artificial',81: 'artificial',82: 'artificial',83: 'artificial',84: 'artificial',85: 'artificial',86: 'artificial',87: 'artificial',88: 'artificial',89: 'artificial',
234
- 90: 'artificial',91: 'artificial',92: 'artificial',93: 'artificial',94: 'natural',95: 'artificial',96: 'artificial',97: 'artificial',98: 'artificial',99: 'artificial',
235
- 100: 'artificial',101: 'artificial',102: 'artificial',103: 'artificial',104: 'artificial',105: 'artificial',106: 'artificial',107: 'artificial',108: 'artificial',109: 'artificial',
236
- 110: 'artificial',111: 'artificial',112: 'artificial',113: 'natural',114: 'artificial',115: 'artificial',116: 'artificial',117: 'artificial',118: 'artificial',119: 'artificial',
237
- 120: 'artificial',121: 'artificial',122: 'artificial',123: 'artificial',124: 'artificial',125: 'artificial',126: 'natural',127: 'artificial',128: 'natural',129: 'artificial',
238
- 130: 'artificial',131: 'artificial',132: 'artificial',133: 'artificial',134: 'artificial',135: 'artificial',136: 'artificial',137: 'artificial',138: 'artificial',139: 'artificial',
239
- 140: 'artificial',141: 'artificial',142: 'artificial',143: 'artificial',144: 'artificial',145: 'artificial',146: 'artificial',147: 'artificial',148: 'artificial',149: 'artificial'
240
- }
241
- # natural_labels = {
242
- # 'sky', 'tree', 'grass', 'mountain, mount', 'plant', 'water', 'earth, ground',
243
- # 'rock, stone', 'sand', 'flower', 'hill', 'palm, palm tree', 'river', 'sea',
244
- # 'field', 'land, ground, soil', 'falls', 'lake', 'animal'
245
- # }
246
- # self.ade_classes = {
247
- # key: "natural" if value in natural_labels else "artificial"
248
- # for key, value in self.predictor.model.config.id2label.items()
249
- # }
250
-
251
- def predict(self, image, w=512):
252
- self.tile_split = ImageTilesSplit(image, name=self.name)
253
- self.tile_split.split(tile_size=(w, w))
254
-
255
- n = 1
256
- l = len(self.tile_split.tiles_info['tiles'])
257
- for tile_key, tile_info in self.tile_split.tiles_info['tiles'].items():
258
- mask = self.predictor.predict_one(tile_info['tile'])
259
- mask = self.predictor.relabel_predictions(mask, self.ade_classes, self._classes)
260
- self.tile_split.tiles_info['tiles'][tile_key]['predicted'] = mask
261
- if n % 100 == 0:
262
- info(f"predict {self.name}", f"{n}/{l}", f"on {self.device}")
263
- n += 1
264
-
265
- self.tile_split.join(tile_join_key="predicted")
266
-
267
- def plot(self, alpha=0.5):
268
- self.predictor.plot_mask(self.tile_split.tiles_info['image'], self.tile_split.tiles_info['predicted'], alpha=alpha)
269
-
270
- def plot_tile(self, tile_key, alpha=0.5):
271
- self.predictor.plot_mask(self.tile_split.tiles_info['tiles'][tile_key]['tile'], self.tile_split.tiles_info['tiles'][tile_key]['predicted'], alpha=alpha)
272
-
273
-
274
- class ThrPredictor:
275
- plot_lock = threading.Lock()
276
-
277
- def __init__(self, name, base_path, w, cache_dir, device):
278
- self.image = cv2.imread(os.path.join(base_path, name))
279
- self.name = name
280
- self.w = w
281
- self.cache_dir = cache_dir
282
- self.device = device
283
-
284
- def start(self):
285
- self.t = threading.Thread(target = self.run)
286
- self.t.start()
287
-
288
- def join(self):
289
- self.t.join()
290
-
291
- def plot(self, alpha=0.3):
292
- info("Plot", self.name)
293
- self.predictor.tile_split.plot_tiles_with_grid()
294
- self.predictor.plot(alpha=alpha)
295
- # self.predictor.plot_tile("tile_11_11", alpha=alpha)
296
-
297
- def run(self):
298
- try:
299
- self.predictor = UrbanPredictor(name=self.name, cache_dir=self.cache_dir, device=self.device)
300
- self.predictor.predict(self.image, w=self.w)
301
- with ThrPredictor.plot_lock:
302
- self.plot(alpha=0.3)
303
- except Exception as e:
304
- error(e)
@@ -1,2 +0,0 @@
1
-
2
- __version__ = '0.1.54'
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes