nv-ingest-api 26.1.0rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (177) hide show
  1. nv_ingest_api/__init__.py +3 -0
  2. nv_ingest_api/interface/__init__.py +218 -0
  3. nv_ingest_api/interface/extract.py +977 -0
  4. nv_ingest_api/interface/mutate.py +154 -0
  5. nv_ingest_api/interface/store.py +200 -0
  6. nv_ingest_api/interface/transform.py +382 -0
  7. nv_ingest_api/interface/utility.py +186 -0
  8. nv_ingest_api/internal/__init__.py +0 -0
  9. nv_ingest_api/internal/enums/__init__.py +3 -0
  10. nv_ingest_api/internal/enums/common.py +550 -0
  11. nv_ingest_api/internal/extract/__init__.py +3 -0
  12. nv_ingest_api/internal/extract/audio/__init__.py +3 -0
  13. nv_ingest_api/internal/extract/audio/audio_extraction.py +202 -0
  14. nv_ingest_api/internal/extract/docx/__init__.py +5 -0
  15. nv_ingest_api/internal/extract/docx/docx_extractor.py +232 -0
  16. nv_ingest_api/internal/extract/docx/engines/__init__.py +0 -0
  17. nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py +3 -0
  18. nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py +127 -0
  19. nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py +971 -0
  20. nv_ingest_api/internal/extract/html/__init__.py +3 -0
  21. nv_ingest_api/internal/extract/html/html_extractor.py +84 -0
  22. nv_ingest_api/internal/extract/image/__init__.py +3 -0
  23. nv_ingest_api/internal/extract/image/chart_extractor.py +375 -0
  24. nv_ingest_api/internal/extract/image/image_extractor.py +208 -0
  25. nv_ingest_api/internal/extract/image/image_helpers/__init__.py +3 -0
  26. nv_ingest_api/internal/extract/image/image_helpers/common.py +433 -0
  27. nv_ingest_api/internal/extract/image/infographic_extractor.py +290 -0
  28. nv_ingest_api/internal/extract/image/ocr_extractor.py +407 -0
  29. nv_ingest_api/internal/extract/image/table_extractor.py +391 -0
  30. nv_ingest_api/internal/extract/pdf/__init__.py +3 -0
  31. nv_ingest_api/internal/extract/pdf/engines/__init__.py +19 -0
  32. nv_ingest_api/internal/extract/pdf/engines/adobe.py +484 -0
  33. nv_ingest_api/internal/extract/pdf/engines/llama.py +246 -0
  34. nv_ingest_api/internal/extract/pdf/engines/nemotron_parse.py +598 -0
  35. nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +166 -0
  36. nv_ingest_api/internal/extract/pdf/engines/pdfium.py +652 -0
  37. nv_ingest_api/internal/extract/pdf/engines/tika.py +96 -0
  38. nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py +426 -0
  39. nv_ingest_api/internal/extract/pdf/pdf_extractor.py +74 -0
  40. nv_ingest_api/internal/extract/pptx/__init__.py +5 -0
  41. nv_ingest_api/internal/extract/pptx/engines/__init__.py +0 -0
  42. nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py +968 -0
  43. nv_ingest_api/internal/extract/pptx/pptx_extractor.py +210 -0
  44. nv_ingest_api/internal/meta/__init__.py +3 -0
  45. nv_ingest_api/internal/meta/udf.py +232 -0
  46. nv_ingest_api/internal/mutate/__init__.py +3 -0
  47. nv_ingest_api/internal/mutate/deduplicate.py +110 -0
  48. nv_ingest_api/internal/mutate/filter.py +133 -0
  49. nv_ingest_api/internal/primitives/__init__.py +0 -0
  50. nv_ingest_api/internal/primitives/control_message_task.py +16 -0
  51. nv_ingest_api/internal/primitives/ingest_control_message.py +307 -0
  52. nv_ingest_api/internal/primitives/nim/__init__.py +9 -0
  53. nv_ingest_api/internal/primitives/nim/default_values.py +14 -0
  54. nv_ingest_api/internal/primitives/nim/model_interface/__init__.py +3 -0
  55. nv_ingest_api/internal/primitives/nim/model_interface/cached.py +274 -0
  56. nv_ingest_api/internal/primitives/nim/model_interface/decorators.py +56 -0
  57. nv_ingest_api/internal/primitives/nim/model_interface/deplot.py +270 -0
  58. nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +338 -0
  59. nv_ingest_api/internal/primitives/nim/model_interface/nemotron_parse.py +239 -0
  60. nv_ingest_api/internal/primitives/nim/model_interface/ocr.py +776 -0
  61. nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +367 -0
  62. nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py +129 -0
  63. nv_ingest_api/internal/primitives/nim/model_interface/vlm.py +177 -0
  64. nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +1681 -0
  65. nv_ingest_api/internal/primitives/nim/nim_client.py +801 -0
  66. nv_ingest_api/internal/primitives/nim/nim_model_interface.py +126 -0
  67. nv_ingest_api/internal/primitives/tracing/__init__.py +0 -0
  68. nv_ingest_api/internal/primitives/tracing/latency.py +69 -0
  69. nv_ingest_api/internal/primitives/tracing/logging.py +96 -0
  70. nv_ingest_api/internal/primitives/tracing/tagging.py +288 -0
  71. nv_ingest_api/internal/schemas/__init__.py +3 -0
  72. nv_ingest_api/internal/schemas/extract/__init__.py +3 -0
  73. nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +133 -0
  74. nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +144 -0
  75. nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +129 -0
  76. nv_ingest_api/internal/schemas/extract/extract_html_schema.py +34 -0
  77. nv_ingest_api/internal/schemas/extract/extract_image_schema.py +126 -0
  78. nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +137 -0
  79. nv_ingest_api/internal/schemas/extract/extract_ocr_schema.py +137 -0
  80. nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +220 -0
  81. nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +128 -0
  82. nv_ingest_api/internal/schemas/extract/extract_table_schema.py +137 -0
  83. nv_ingest_api/internal/schemas/message_brokers/__init__.py +3 -0
  84. nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +37 -0
  85. nv_ingest_api/internal/schemas/message_brokers/request_schema.py +34 -0
  86. nv_ingest_api/internal/schemas/message_brokers/response_schema.py +19 -0
  87. nv_ingest_api/internal/schemas/meta/__init__.py +3 -0
  88. nv_ingest_api/internal/schemas/meta/base_model_noext.py +11 -0
  89. nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +355 -0
  90. nv_ingest_api/internal/schemas/meta/metadata_schema.py +394 -0
  91. nv_ingest_api/internal/schemas/meta/udf.py +23 -0
  92. nv_ingest_api/internal/schemas/mixins.py +39 -0
  93. nv_ingest_api/internal/schemas/mutate/__init__.py +3 -0
  94. nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py +16 -0
  95. nv_ingest_api/internal/schemas/store/__init__.py +3 -0
  96. nv_ingest_api/internal/schemas/store/store_embedding_schema.py +28 -0
  97. nv_ingest_api/internal/schemas/store/store_image_schema.py +45 -0
  98. nv_ingest_api/internal/schemas/transform/__init__.py +3 -0
  99. nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +36 -0
  100. nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py +17 -0
  101. nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +48 -0
  102. nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py +24 -0
  103. nv_ingest_api/internal/store/__init__.py +3 -0
  104. nv_ingest_api/internal/store/embed_text_upload.py +236 -0
  105. nv_ingest_api/internal/store/image_upload.py +251 -0
  106. nv_ingest_api/internal/transform/__init__.py +3 -0
  107. nv_ingest_api/internal/transform/caption_image.py +219 -0
  108. nv_ingest_api/internal/transform/embed_text.py +702 -0
  109. nv_ingest_api/internal/transform/split_text.py +182 -0
  110. nv_ingest_api/util/__init__.py +3 -0
  111. nv_ingest_api/util/control_message/__init__.py +0 -0
  112. nv_ingest_api/util/control_message/validators.py +47 -0
  113. nv_ingest_api/util/converters/__init__.py +0 -0
  114. nv_ingest_api/util/converters/bytetools.py +78 -0
  115. nv_ingest_api/util/converters/containers.py +65 -0
  116. nv_ingest_api/util/converters/datetools.py +90 -0
  117. nv_ingest_api/util/converters/dftools.py +127 -0
  118. nv_ingest_api/util/converters/formats.py +64 -0
  119. nv_ingest_api/util/converters/type_mappings.py +27 -0
  120. nv_ingest_api/util/dataloader/__init__.py +9 -0
  121. nv_ingest_api/util/dataloader/dataloader.py +409 -0
  122. nv_ingest_api/util/detectors/__init__.py +5 -0
  123. nv_ingest_api/util/detectors/language.py +38 -0
  124. nv_ingest_api/util/exception_handlers/__init__.py +0 -0
  125. nv_ingest_api/util/exception_handlers/converters.py +72 -0
  126. nv_ingest_api/util/exception_handlers/decorators.py +429 -0
  127. nv_ingest_api/util/exception_handlers/detectors.py +74 -0
  128. nv_ingest_api/util/exception_handlers/pdf.py +116 -0
  129. nv_ingest_api/util/exception_handlers/schemas.py +68 -0
  130. nv_ingest_api/util/image_processing/__init__.py +5 -0
  131. nv_ingest_api/util/image_processing/clustering.py +260 -0
  132. nv_ingest_api/util/image_processing/processing.py +177 -0
  133. nv_ingest_api/util/image_processing/table_and_chart.py +504 -0
  134. nv_ingest_api/util/image_processing/transforms.py +850 -0
  135. nv_ingest_api/util/imports/__init__.py +3 -0
  136. nv_ingest_api/util/imports/callable_signatures.py +108 -0
  137. nv_ingest_api/util/imports/dynamic_resolvers.py +158 -0
  138. nv_ingest_api/util/introspection/__init__.py +3 -0
  139. nv_ingest_api/util/introspection/class_inspect.py +145 -0
  140. nv_ingest_api/util/introspection/function_inspect.py +65 -0
  141. nv_ingest_api/util/logging/__init__.py +0 -0
  142. nv_ingest_api/util/logging/configuration.py +102 -0
  143. nv_ingest_api/util/logging/sanitize.py +84 -0
  144. nv_ingest_api/util/message_brokers/__init__.py +3 -0
  145. nv_ingest_api/util/message_brokers/qos_scheduler.py +283 -0
  146. nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py +9 -0
  147. nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +465 -0
  148. nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py +71 -0
  149. nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +455 -0
  150. nv_ingest_api/util/metadata/__init__.py +5 -0
  151. nv_ingest_api/util/metadata/aggregators.py +516 -0
  152. nv_ingest_api/util/multi_processing/__init__.py +8 -0
  153. nv_ingest_api/util/multi_processing/mp_pool_singleton.py +200 -0
  154. nv_ingest_api/util/nim/__init__.py +161 -0
  155. nv_ingest_api/util/pdf/__init__.py +3 -0
  156. nv_ingest_api/util/pdf/pdfium.py +428 -0
  157. nv_ingest_api/util/schema/__init__.py +3 -0
  158. nv_ingest_api/util/schema/schema_validator.py +10 -0
  159. nv_ingest_api/util/service_clients/__init__.py +3 -0
  160. nv_ingest_api/util/service_clients/client_base.py +86 -0
  161. nv_ingest_api/util/service_clients/kafka/__init__.py +3 -0
  162. nv_ingest_api/util/service_clients/redis/__init__.py +3 -0
  163. nv_ingest_api/util/service_clients/redis/redis_client.py +983 -0
  164. nv_ingest_api/util/service_clients/rest/__init__.py +0 -0
  165. nv_ingest_api/util/service_clients/rest/rest_client.py +595 -0
  166. nv_ingest_api/util/string_processing/__init__.py +51 -0
  167. nv_ingest_api/util/string_processing/configuration.py +682 -0
  168. nv_ingest_api/util/string_processing/yaml.py +109 -0
  169. nv_ingest_api/util/system/__init__.py +0 -0
  170. nv_ingest_api/util/system/hardware_info.py +594 -0
  171. nv_ingest_api-26.1.0rc4.dist-info/METADATA +237 -0
  172. nv_ingest_api-26.1.0rc4.dist-info/RECORD +177 -0
  173. nv_ingest_api-26.1.0rc4.dist-info/WHEEL +5 -0
  174. nv_ingest_api-26.1.0rc4.dist-info/licenses/LICENSE +201 -0
  175. nv_ingest_api-26.1.0rc4.dist-info/top_level.txt +2 -0
  176. udfs/__init__.py +5 -0
  177. udfs/llm_summarizer_udf.py +259 -0
@@ -0,0 +1,594 @@
1
+ import logging
2
+ import os
3
+ import platform
4
+ from typing import Optional, Dict, Any, Tuple
5
+
6
+ # Try importing psutil, but don't make it a hard requirement if only cgroups are needed
7
+ try:
8
+ import psutil
9
+ except ImportError:
10
+ psutil = None
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # --- Cgroup Constants ---
15
+ CGROUP_V1_CPU_DIR = "/sys/fs/cgroup/cpu"
16
+ CGROUP_V1_CPUACCT_DIR = "/sys/fs/cgroup/cpuacct" # Sometimes usage is here
17
+ CGROUP_V2_CPU_FILE = "/sys/fs/cgroup/cpu.max" # Standard path in v2 unified hierarchy
18
+
19
+ # Memory cgroup paths
20
+ CGROUP_V1_MEMORY_DIR = "/sys/fs/cgroup/memory"
21
+ CGROUP_V2_MEMORY_FILE = "/sys/fs/cgroup/memory.max" # v2 unified hierarchy
22
+ CGROUP_V2_MEMORY_CURRENT = "/sys/fs/cgroup/memory.current" # Current usage in v2
23
+
24
+
25
+ class SystemResourceProbe:
26
+ """
27
+ Detects the effective CPU core count available to the current process,
28
+ optionally applying a weighting factor for hyperthreads (SMT).
29
+
30
+ It attempts to reconcile information from:
31
+ 1. Linux Cgroup v2 CPU limits (cpu.max)
32
+ 2. Linux Cgroup v1 CPU limits (cpu.cfs_quota_us, cpu.cfs_period_us)
33
+ 3. OS scheduler affinity (os.sched_getaffinity)
34
+ 4. OS reported CPU counts (psutil.cpu_count for logical/physical)
35
+
36
+ Prioritizes Cgroup quota limits. If the limit is based on core count
37
+ (affinity/OS), it applies hyperthreading weight if psutil provides
38
+ physical/logical counts.
39
+ """
40
+
41
+ def __init__(self, hyperthread_weight: float = 0.75):
42
+ """
43
+ Initializes the detector and performs the detection.
44
+
45
+ Parameters
46
+ ----------
47
+ hyperthread_weight : float, optional
48
+ The performance weighting factor for hyperthreads (0.0 to 1.0).
49
+ A value of 1.0 treats hyperthreads the same as physical cores.
50
+ A value of 0.5 suggests a hyperthread adds 50% extra performance.
51
+ Requires psutil to be installed and report physical cores.
52
+ Defaults to 0.75.
53
+
54
+ Note: the default value of 0.75 is a heuristic and may not be optimal
55
+ for all situations. It is where parallel pdf decomposition efficiency
56
+ is observed to begin rolling off.
57
+ """
58
+ if not (0.0 <= hyperthread_weight <= 1.0):
59
+ raise ValueError("hyperthread_weight must be between 0.0 and 1.0")
60
+
61
+ self.hyperthread_weight: float = hyperthread_weight if psutil else 1.0 # Force 1.0 if psutil missing
62
+ if not psutil and hyperthread_weight != 1.0:
63
+ logger.warning("psutil not found. Hyperthreading weight ignored (effectively 1.0).")
64
+
65
+ # OS Info
66
+ self.os_logical_cores: Optional[int] = None
67
+ self.os_physical_cores: Optional[int] = None
68
+ self.os_sched_affinity_cores: Optional[int] = None
69
+
70
+ # Cgroup Info
71
+ self.cgroup_type: Optional[str] = None
72
+ self.cgroup_quota_cores: Optional[float] = None
73
+ self.cgroup_period_us: Optional[int] = None
74
+ self.cgroup_shares: Optional[int] = None
75
+ self.cgroup_usage_percpu_us: Optional[list[int]] = None
76
+ self.cgroup_usage_total_us: Optional[int] = None
77
+
78
+ # Memory Info
79
+ self.os_total_memory_bytes: Optional[int] = None
80
+ self.cgroup_memory_limit_bytes: Optional[int] = None
81
+ self.cgroup_memory_usage_bytes: Optional[int] = None
82
+ self.effective_memory_bytes: Optional[int] = None
83
+ self.memory_detection_method: str = "unknown"
84
+
85
+ # --- Result ---
86
+ # Raw limit before potential weighting
87
+ self.raw_limit_value: Optional[float] = None
88
+ self.raw_limit_method: str = "unknown"
89
+ # Final potentially weighted result
90
+ self.effective_cores: Optional[float] = None
91
+ self.detection_method: str = "unknown" # Method for the final effective_cores
92
+
93
+ self._detect()
94
+
95
+ @staticmethod
96
+ def _read_file_int(path: str) -> Optional[int]:
97
+ """Safely reads an integer from a file."""
98
+ try:
99
+ if os.path.exists(path):
100
+ with open(path, "r") as f:
101
+ content = f.read().strip()
102
+ if content:
103
+ return int(content)
104
+ except (IOError, ValueError, PermissionError) as e:
105
+ logger.debug(f"Failed to read or parse int from {path}: {e}")
106
+ return None
107
+
108
+ @staticmethod
109
+ def _read_file_str(path: str) -> Optional[str]:
110
+ """Safely reads a string from a file."""
111
+ try:
112
+ if os.path.exists(path):
113
+ with open(path, "r") as f:
114
+ return f.read().strip()
115
+ except (IOError, PermissionError) as e:
116
+ logger.debug(f"Failed to read string from {path}: {e}")
117
+ return None
118
+
119
+ def _read_cgroup_v1(self) -> bool:
120
+ """Attempts to read Cgroup v1 CPU limits."""
121
+ if not os.path.exists(CGROUP_V1_CPU_DIR):
122
+ logger.debug(f"Cgroup v1 CPU dir not found: {CGROUP_V1_CPU_DIR}")
123
+ return False
124
+
125
+ logger.debug(f"Checking Cgroup v1 limits in {CGROUP_V1_CPU_DIR}")
126
+ quota_us = self._read_file_int(os.path.join(CGROUP_V1_CPU_DIR, "cpu.cfs_quota_us"))
127
+ period_us = self._read_file_int(os.path.join(CGROUP_V1_CPU_DIR, "cpu.cfs_period_us"))
128
+ shares = self._read_file_int(os.path.join(CGROUP_V1_CPU_DIR, "cpu.shares"))
129
+
130
+ # Check cpuacct for usage stats if dir exists
131
+ if os.path.exists(CGROUP_V1_CPUACCT_DIR):
132
+ usage_total = self._read_file_int(os.path.join(CGROUP_V1_CPUACCT_DIR, "cpuacct.usage"))
133
+ usage_percpu_str = self._read_file_str(os.path.join(CGROUP_V1_CPUACCT_DIR, "cpuacct.usage_percpu"))
134
+ if usage_percpu_str:
135
+ try:
136
+ self.cgroup_usage_percpu_us = [int(x) for x in usage_percpu_str.split()]
137
+ except ValueError:
138
+ logger.warning("Could not parse cpuacct.usage_percpu")
139
+ if usage_total is not None:
140
+ self.cgroup_usage_total_us = usage_total
141
+
142
+ if quota_us is not None and period_us is not None:
143
+ self.cgroup_type = "v1"
144
+ self.cgroup_period_us = period_us
145
+ self.cgroup_shares = shares # May be None if file doesn't exist/readable
146
+
147
+ if quota_us > 0 and period_us > 0:
148
+ self.cgroup_quota_cores = quota_us / period_us
149
+ logger.debug(
150
+ f"Cgroup v1 quota detected: {quota_us} us / {period_us} us = {self.cgroup_quota_cores:.2f}"
151
+ f" effective cores"
152
+ )
153
+ return True
154
+ elif quota_us == -1:
155
+ logger.debug("Cgroup v1 quota detected: Unlimited (-1)")
156
+ # No quota limit, but we know it's cgroup v1
157
+ return True # Return true because we identified the type
158
+ else:
159
+ logger.warning(f"Cgroup v1 quota/period values invalid? Quota: {quota_us}, Period: {period_us}")
160
+
161
+ elif shares is not None: # If only shares are readable, still note it's v1
162
+ self.cgroup_type = "v1"
163
+ self.cgroup_shares = shares
164
+ logger.debug(f"Cgroup v1 shares detected: {shares} (no quota found)")
165
+ return True
166
+
167
+ return False
168
+
169
+ def _read_cgroup_v2(self) -> bool:
170
+ """Attempts to read Cgroup v2 CPU limits."""
171
+ if not os.path.exists(CGROUP_V2_CPU_FILE):
172
+ logger.debug(f"Cgroup v2 cpu.max file not found: {CGROUP_V2_CPU_FILE}")
173
+ return False
174
+
175
+ logger.debug(f"Checking Cgroup v2 limits in {CGROUP_V2_CPU_FILE}")
176
+ content = self._read_file_str(CGROUP_V2_CPU_FILE)
177
+ if content:
178
+ self.cgroup_type = "v2"
179
+ parts = content.split()
180
+ if len(parts) == 2:
181
+ quota_str, period_str = parts
182
+ try:
183
+ period_us = int(period_str)
184
+ self.cgroup_period_us = period_us
185
+ if quota_str == "max":
186
+ logger.debug("Cgroup v2 quota detected: Unlimited ('max')")
187
+ return True # Identified type, no quota limit
188
+ else:
189
+ quota_us = int(quota_str)
190
+ if quota_us > 0 and period_us > 0:
191
+ self.cgroup_quota_cores = quota_us / period_us
192
+ logger.debug(
193
+ f"Cgroup v2 quota detected: {quota_us} us / {period_us}"
194
+ f" us = {self.cgroup_quota_cores:.2f} effective cores"
195
+ )
196
+ return True
197
+ else:
198
+ logger.warning(
199
+ f"Cgroup v2 quota/period values invalid? Quota: {quota_us}, Period: {period_us}"
200
+ )
201
+
202
+ except ValueError:
203
+ logger.warning(f"Could not parse Cgroup v2 cpu.max content: '{content}'")
204
+ else:
205
+ logger.warning(f"Unexpected format in Cgroup v2 cpu.max: '{content}'")
206
+ return False
207
+
208
+ @staticmethod
209
+ def _get_os_affinity() -> Optional[int]:
210
+ """Gets CPU count via os.sched_getaffinity."""
211
+ if platform.system() != "Linux":
212
+ logger.debug("os.sched_getaffinity is Linux-specific.")
213
+ return None
214
+ try:
215
+ # sched_getaffinity exists on Linux
216
+ affinity = os.sched_getaffinity(0) # 0 for current process
217
+ count = len(affinity)
218
+ if count > 0:
219
+ logger.debug(f"Detected {count} cores via os.sched_getaffinity.")
220
+ return count
221
+ else:
222
+ logger.warning("os.sched_getaffinity(0) returned 0 or empty set.")
223
+ return None
224
+ except AttributeError:
225
+ logger.debug("os.sched_getaffinity not available on this platform/Python version.")
226
+ return None
227
+ except OSError as e:
228
+ logger.warning(f"Could not get affinity: {e}")
229
+ return None
230
+
231
+ @staticmethod
232
+ def _get_os_cpu_counts() -> Tuple[Optional[int], Optional[int]]:
233
+ """Gets logical and physical CPU counts using psutil or os.cpu_count."""
234
+ logical = None
235
+ physical = None
236
+ source = "unknown"
237
+
238
+ if psutil:
239
+ try:
240
+ logical = psutil.cpu_count(logical=True)
241
+ physical = psutil.cpu_count(logical=False)
242
+ source = "psutil"
243
+ if not logical:
244
+ logical = None # Ensure None if psutil returns 0/None
245
+ if not physical:
246
+ physical = None
247
+ except Exception as e:
248
+ logger.warning(f"psutil.cpu_count failed: {e}. Falling back to os.cpu_count.")
249
+ logical, physical = None, None # Reset before fallback
250
+
251
+ if logical is None: # Fallback if psutil failed or not installed
252
+ try:
253
+ logical = os.cpu_count()
254
+ source = "os.cpu_count"
255
+ # os.cpu_count doesn't usually provide physical count, leave as None
256
+ except NotImplementedError:
257
+ logger.error("os.cpu_count() is not implemented on this system.")
258
+ except Exception as e:
259
+ logger.error(f"os.cpu_count() failed: {e}")
260
+
261
+ if logical:
262
+ logger.debug(f"Detected {logical} logical cores via {source}.")
263
+ if physical:
264
+ logger.debug(f"Detected {physical} physical cores via {source}.")
265
+
266
+ return logical, physical
267
+
268
+ # --- Weighting Function ---
269
+ def _apply_hyperthread_weight(self, logical_limit: int) -> float:
270
+ """
271
+ Applies hyperthreading weight to an integer logical core limit.
272
+
273
+ Parameters
274
+ ----------
275
+ logical_limit : int
276
+ The maximum number of logical cores allowed (e.g., from affinity or OS count).
277
+
278
+ Returns
279
+ -------
280
+ float
281
+ The estimated effective core performance based on weighting.
282
+ Returns logical_limit if weighting cannot be applied.
283
+ """
284
+ P = self.os_physical_cores
285
+ # Weighting requires knowing both physical and logical counts
286
+ if P is not None and P > 0 and self.os_logical_cores is not None:
287
+ # Apply the heuristic: P physical cores + (N-P) hyperthreads * weight
288
+ # Ensure N is capped by the actual number of logical cores available
289
+ N = min(logical_limit, self.os_logical_cores)
290
+
291
+ physical_part = min(N, P)
292
+ hyperthread_part = max(0, N - P)
293
+
294
+ weighted_cores = (physical_part * 1.0) + (hyperthread_part * self.hyperthread_weight)
295
+
296
+ if weighted_cores != N: # Log only if weighting changes the value
297
+ logger.debug(
298
+ f"Applying hyperthread weight ({self.hyperthread_weight:.2f}) to "
299
+ f"logical limit {logical_limit} (System: {P}P/{self.os_logical_cores}L): "
300
+ f"Effective weighted cores = {weighted_cores:.2f}"
301
+ )
302
+ else:
303
+ logger.debug(
304
+ f"Hyperthread weighting ({self.hyperthread_weight:.2f}) applied to "
305
+ f"logical limit {logical_limit} (System: {P}P/{self.os_logical_cores}L), "
306
+ f"but result is still {weighted_cores:.2f} (e.g., limit <= physical or weight=1.0)"
307
+ )
308
+ return weighted_cores
309
+ else:
310
+ # Cannot apply weighting
311
+ if self.hyperthread_weight != 1.0: # Only warn if weighting was requested
312
+ if not psutil:
313
+ # Already warned about missing psutil during init
314
+ pass
315
+ elif P is None:
316
+ logger.warning("Cannot apply hyperthread weight: Physical core count not available.")
317
+ else: # L must be missing
318
+ logger.warning("Cannot apply hyperthread weight: Logical core count not available.")
319
+
320
+ logger.debug(f"Skipping hyperthread weight calculation for logical limit {logical_limit}.")
321
+ return float(logical_limit) # Return the original limit as float
322
+
323
+ # --- Memory Detection Methods ---
324
+ @staticmethod
325
+ def _get_os_memory() -> Optional[int]:
326
+ """Gets total system memory in bytes using psutil or /proc/meminfo."""
327
+ # Try psutil first
328
+ if psutil:
329
+ try:
330
+ memory = psutil.virtual_memory()
331
+ total_bytes = memory.total
332
+ if total_bytes and total_bytes > 0:
333
+ logger.debug(f"Detected {total_bytes / (1024**3):.2f} GB system memory via psutil.")
334
+ return total_bytes
335
+ except Exception as e:
336
+ logger.warning(f"psutil.virtual_memory() failed: {e}. Falling back to /proc/meminfo.")
337
+
338
+ # Fallback to /proc/meminfo
339
+ try:
340
+ if os.path.exists("/proc/meminfo"):
341
+ with open("/proc/meminfo", "r") as f:
342
+ for line in f:
343
+ if line.startswith("MemTotal:"):
344
+ # MemTotal is in KB
345
+ parts = line.split()
346
+ if len(parts) >= 2:
347
+ total_kb = int(parts[1])
348
+ total_bytes = total_kb * 1024
349
+ logger.debug(
350
+ f"Detected {total_bytes / (1024**3):.2f} GB system memory via /proc/meminfo."
351
+ )
352
+ return total_bytes
353
+ break
354
+ except (IOError, ValueError, PermissionError) as e:
355
+ logger.warning(f"Failed to read /proc/meminfo: {e}")
356
+
357
+ logger.error("Could not determine system memory from any source.")
358
+ return None
359
+
360
+ def _read_memory_cgroup_v2(self) -> bool:
361
+ """Attempts to read Cgroup v2 memory limits."""
362
+ if not os.path.exists(CGROUP_V2_MEMORY_FILE):
363
+ logger.debug(f"Cgroup v2 memory.max file not found: {CGROUP_V2_MEMORY_FILE}")
364
+ return False
365
+
366
+ logger.debug(f"Checking Cgroup v2 memory limits in {CGROUP_V2_MEMORY_FILE}")
367
+ content = self._read_file_str(CGROUP_V2_MEMORY_FILE)
368
+ if content:
369
+ try:
370
+ if content == "max":
371
+ logger.debug("Cgroup v2 memory limit: unlimited")
372
+ return True
373
+ else:
374
+ limit_bytes = int(content)
375
+ self.cgroup_memory_limit_bytes = limit_bytes
376
+ logger.debug(f"Cgroup v2 memory limit: {limit_bytes / (1024**3):.2f} GB")
377
+
378
+ # Also try to read current usage
379
+ usage_content = self._read_file_str(CGROUP_V2_MEMORY_CURRENT)
380
+ if usage_content:
381
+ try:
382
+ usage_bytes = int(usage_content)
383
+ self.cgroup_memory_usage_bytes = usage_bytes
384
+ logger.debug(f"Cgroup v2 memory usage: {usage_bytes / (1024**3):.2f} GB")
385
+ except ValueError:
386
+ logger.debug(f"Could not parse memory.current: '{usage_content}'")
387
+
388
+ return True
389
+ except ValueError:
390
+ logger.warning(f"Could not parse Cgroup v2 memory.max content: '{content}'")
391
+ return False
392
+
393
+ def _read_memory_cgroup_v1(self) -> bool:
394
+ """Attempts to read Cgroup v1 memory limits."""
395
+ if not os.path.exists(CGROUP_V1_MEMORY_DIR):
396
+ logger.debug(f"Cgroup v1 memory dir not found: {CGROUP_V1_MEMORY_DIR}")
397
+ return False
398
+
399
+ logger.debug(f"Checking Cgroup v1 memory limits in {CGROUP_V1_MEMORY_DIR}")
400
+
401
+ # Try memory.limit_in_bytes
402
+ limit_bytes = self._read_file_int(os.path.join(CGROUP_V1_MEMORY_DIR, "memory.limit_in_bytes"))
403
+ usage_bytes = self._read_file_int(os.path.join(CGROUP_V1_MEMORY_DIR, "memory.usage_in_bytes"))
404
+
405
+ if limit_bytes is not None:
406
+ # Cgroup v1 often shows very large values (like 9223372036854775807) for unlimited
407
+ # We consider values >= 2^63-1 or >= system memory * 100 as unlimited
408
+ if limit_bytes >= 9223372036854775807 or (
409
+ self.os_total_memory_bytes and limit_bytes >= self.os_total_memory_bytes * 100
410
+ ):
411
+ logger.debug("Cgroup v1 memory limit: unlimited (very large value)")
412
+ return True
413
+ else:
414
+ self.cgroup_memory_limit_bytes = limit_bytes
415
+ logger.debug(f"Cgroup v1 memory limit: {limit_bytes / (1024**3):.2f} GB")
416
+
417
+ if usage_bytes is not None:
418
+ self.cgroup_memory_usage_bytes = usage_bytes
419
+ logger.debug(f"Cgroup v1 memory usage: {usage_bytes / (1024**3):.2f} GB")
420
+
421
+ return True
422
+
423
+ return False
424
+
425
+ def _detect_memory(self):
426
+ """Performs memory detection sequence."""
427
+ logger.debug("Starting memory detection...")
428
+
429
+ # 1. Get OS level memory first
430
+ self.os_total_memory_bytes = self._get_os_memory()
431
+
432
+ # 2. Try Cgroup v2 memory limits
433
+ cgroup_memory_detected = self._read_memory_cgroup_v2()
434
+
435
+ # 3. Try Cgroup v1 if v2 not found or didn't yield a limit
436
+ if not cgroup_memory_detected or self.cgroup_memory_limit_bytes is None:
437
+ cgroup_memory_detected = self._read_memory_cgroup_v1()
438
+
439
+ # 4. Determine effective memory
440
+ if self.cgroup_memory_limit_bytes is not None:
441
+ self.effective_memory_bytes = self.cgroup_memory_limit_bytes
442
+ self.memory_detection_method = "cgroup_limited"
443
+ logger.debug(f"Effective memory: {self.effective_memory_bytes / (1024**3):.2f} GB (cgroup limited)")
444
+ elif self.os_total_memory_bytes is not None:
445
+ # No cgroup limit, use system memory
446
+ self.effective_memory_bytes = self.os_total_memory_bytes
447
+ self.memory_detection_method = "system_memory"
448
+ logger.debug(f"Effective memory: {self.effective_memory_bytes / (1024**3):.2f} GB (system memory)")
449
+ else:
450
+ logger.error("Could not determine effective memory limit")
451
+ self.memory_detection_method = "failed"
452
+
453
+ def _detect(self):
454
+ """Performs the detection sequence and applies weighting."""
455
+ logger.debug("Starting effective core count detection...")
456
+
457
+ # 1. Get OS level counts first
458
+ self.os_logical_cores, self.os_physical_cores = self._get_os_cpu_counts()
459
+
460
+ # 2. Try Cgroup v2
461
+ cgroup_detected = self._read_cgroup_v2()
462
+
463
+ # 3. Try Cgroup v1 if v2 not found or didn't yield quota
464
+ if not cgroup_detected or (self.cgroup_type == "v2" and self.cgroup_quota_cores is None):
465
+ cgroup_detected = self._read_cgroup_v1()
466
+
467
+ # 4. Get OS Affinity
468
+ self.os_sched_affinity_cores = self._get_os_affinity()
469
+
470
+ # 5. Detect Memory
471
+ self._detect_memory()
472
+
473
+ # --- 6. Determine the RAW Limit (before weighting) ---
474
+ raw_limit = float("inf")
475
+ raw_method = "unknown"
476
+
477
+ # Priority 1: Cgroup Quota
478
+ if self.cgroup_quota_cores is not None and self.cgroup_quota_cores > 0:
479
+ raw_limit = min(raw_limit, self.cgroup_quota_cores)
480
+ raw_method = f"cgroup_{self.cgroup_type}_quota"
481
+ logger.debug(f"Raw limit set by Cgroup Quota: {self.cgroup_quota_cores:.2f}")
482
+
483
+ # Priority 2: Scheduler Affinity
484
+ if self.os_sched_affinity_cores is not None and self.os_sched_affinity_cores > 0:
485
+ affinity_limit = float(self.os_sched_affinity_cores)
486
+ if affinity_limit < raw_limit:
487
+ raw_limit = affinity_limit
488
+ raw_method = "sched_affinity"
489
+ logger.debug(f"Raw limit updated by Sched Affinity: {affinity_limit}")
490
+ elif raw_method.startswith("cgroup"):
491
+ logger.debug(
492
+ f"Sched Affinity limit ({affinity_limit}) not stricter than Cgroup Quota ({raw_limit:.2f})."
493
+ )
494
+
495
+ # Priority 3: OS Logical Cores
496
+ if raw_limit == float("inf"): # If no cgroup quota or affinity was found/applied
497
+ if self.os_logical_cores is not None and self.os_logical_cores > 0:
498
+ raw_limit = float(self.os_logical_cores)
499
+ raw_method = "os_logical_count"
500
+ logger.debug(f"Raw limit set by OS Logical Core count: {self.os_logical_cores}")
501
+ else:
502
+ # Absolute fallback
503
+ logger.warning("Could not determine any CPU core limit. Defaulting raw limit to 1.0.")
504
+ raw_limit = 1.0
505
+ raw_method = "fallback_default"
506
+
507
+ self.raw_limit_value = raw_limit
508
+ self.raw_limit_method = raw_method
509
+ logger.debug(f"Raw CPU limit determined: {self.raw_limit_value:.2f} (Method: {self.raw_limit_method})")
510
+
511
+ # --- 7. Apply Weighting (if applicable) ---
512
+ final_effective_cores = raw_limit
513
+ final_method = raw_method
514
+
515
+ # Apply weighting ONLY if the raw limit is NOT from a cgroup quota
516
+ # AND the limit is an integer (or effectively integer) core count
517
+ if not raw_method.startswith("cgroup_"):
518
+ # Check if raw_limit is effectively an integer
519
+ if abs(raw_limit - round(raw_limit)) < 1e-9 and raw_limit > 0:
520
+ logical_limit_int = int(round(raw_limit))
521
+ weighted_value = self._apply_hyperthread_weight(logical_limit_int)
522
+ final_effective_cores = weighted_value
523
+ # Update method if weighting was actually applied and changed the value
524
+ if abs(weighted_value - raw_limit) > 1e-9:
525
+ final_method = f"{raw_method}_weighted"
526
+ else:
527
+ # Keep original method name if weighting didn't change result
528
+ final_method = raw_method
529
+
530
+ else: # Raw limit was affinity/os count but not an integer? Should be rare.
531
+ logger.debug(
532
+ f"Raw limit method '{raw_method}' is not cgroup quota, "
533
+ f"but value {raw_limit:.2f} is not integer. Skipping weighting."
534
+ )
535
+
536
+ elif raw_method.startswith("cgroup_"):
537
+ logger.debug("Raw limit is from Cgroup quota. Using quota value directly (skipping SMT weighting).")
538
+
539
+ self.effective_cores = final_effective_cores
540
+ self.detection_method = final_method # The method for the final value
541
+
542
+ logger.debug(
543
+ f"Effective CPU core limit determined: {self.effective_cores:.2f} " f"(Method: {self.detection_method})"
544
+ )
545
+
546
+ def get_effective_cores(self) -> Optional[float]:
547
+ """Returns the primary result: the effective core limit, potentially weighted."""
548
+ return self.effective_cores
549
+
550
+ @property
551
+ def total_memory_mb(self) -> Optional[float]:
552
+ """Returns the effective memory limit in megabytes."""
553
+ if self.effective_memory_bytes is not None:
554
+ return self.effective_memory_bytes / (1024 * 1024)
555
+ return None
556
+
557
+ @property
558
+ def cpu_count(self) -> Optional[float]:
559
+ """Returns the effective CPU count for compatibility."""
560
+ return self.effective_cores
561
+
562
+ def get_details(self) -> Dict[str, Any]:
563
+ """Returns a dictionary with all detected information."""
564
+ # Calculate full system weighted potential for info
565
+ os_weighted_cores = None
566
+ if self.os_physical_cores and self.os_logical_cores:
567
+ # Use weighting func with the total logical cores as the limit
568
+ os_weighted_cores = self._apply_hyperthread_weight(self.os_logical_cores)
569
+
570
+ return {
571
+ "effective_cores": self.effective_cores,
572
+ "detection_method": self.detection_method,
573
+ "raw_limit_value": self.raw_limit_value,
574
+ "raw_limit_method": self.raw_limit_method,
575
+ "hyperthread_weight_applied": self.hyperthread_weight,
576
+ "os_logical_cores": self.os_logical_cores,
577
+ "os_physical_cores": self.os_physical_cores,
578
+ "os_weighted_potential": os_weighted_cores, # Full system potential weighted
579
+ "os_sched_affinity_cores": self.os_sched_affinity_cores,
580
+ "cgroup_type": self.cgroup_type,
581
+ "cgroup_quota_cores": self.cgroup_quota_cores,
582
+ "cgroup_period_us": self.cgroup_period_us,
583
+ "cgroup_shares": self.cgroup_shares,
584
+ "cgroup_usage_total_us": self.cgroup_usage_total_us,
585
+ "cgroup_usage_percpu_us": self.cgroup_usage_percpu_us,
586
+ # Memory information
587
+ "effective_memory_bytes": self.effective_memory_bytes,
588
+ "effective_memory_mb": self.total_memory_mb,
589
+ "memory_detection_method": self.memory_detection_method,
590
+ "os_total_memory_bytes": self.os_total_memory_bytes,
591
+ "cgroup_memory_limit_bytes": self.cgroup_memory_limit_bytes,
592
+ "cgroup_memory_usage_bytes": self.cgroup_memory_usage_bytes,
593
+ "platform": platform.system(),
594
+ }