code-loader 1.0.94.dev11__py3-none-any.whl → 1.0.153.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of code-loader might be problematic. Click here for more details.

code_loader/leaploader.py CHANGED
@@ -2,6 +2,7 @@
2
2
  import importlib.util
3
3
  import inspect
4
4
  import io
5
+ import os
5
6
  import sys
6
7
  from contextlib import redirect_stdout
7
8
  from functools import lru_cache
@@ -15,7 +16,7 @@ from code_loader.contract.datasetclasses import DatasetSample, DatasetBaseHandle
15
16
  PreprocessResponse, VisualizerHandler, LeapData, \
16
17
  PredictionTypeHandler, MetadataHandler, CustomLayerHandler, MetricHandler, VisualizerHandlerData, MetricHandlerData, \
17
18
  MetricCallableReturnType, CustomLossHandlerData, CustomLossHandler, RawInputsForHeatmap, SamplePreprocessResponse, \
18
- ElementInstance
19
+ ElementInstance, custom_latent_space_attribute
19
20
  from code_loader.contract.enums import DataStateEnum, TestingSectionEnum, DataStateType, DatasetMetadataType
20
21
  from code_loader.contract.exceptions import DatasetScriptException
21
22
  from code_loader.contract.responsedataclasses import DatasetIntegParseResult, DatasetTestResultPayload, \
@@ -23,6 +24,7 @@ from code_loader.contract.responsedataclasses import DatasetIntegParseResult, Da
23
24
  VisualizerInstance, PredictionTypeInstance, ModelSetup, CustomLayerInstance, MetricInstance, CustomLossInstance, \
24
25
  EngineFileContract
25
26
  from code_loader.inner_leap_binder import global_leap_binder
27
+ from code_loader.inner_leap_binder.leapbinder import mapping_runtime_mode_env_var_mame
26
28
  from code_loader.leaploaderbase import LeapLoaderBase
27
29
  from code_loader.utils import get_root_exception_file_and_line_number
28
30
 
@@ -33,10 +35,23 @@ class LeapLoader(LeapLoaderBase):
33
35
 
34
36
  self._preprocess_result_cached = None
35
37
 
38
+ try:
39
+ from code_loader.mixpanel_tracker import track_code_loader_loaded
40
+ track_code_loader_loaded({
41
+ 'event_type': 'leap_loader_instantiated',
42
+ 'code_path': code_path,
43
+ 'code_entry_name': code_entry_name
44
+ })
45
+ except Exception:
46
+ pass
47
+
36
48
  @lru_cache()
37
49
  def exec_script(self) -> None:
38
50
  try:
51
+ os.environ[mapping_runtime_mode_env_var_mame] = 'TRUE'
39
52
  self.evaluate_module()
53
+ if global_leap_binder.integration_test_func is not None:
54
+ global_leap_binder.integration_test_func(None, PreprocessResponse(state=DataStateType.training, length=0))
40
55
  except TypeError as e:
41
56
  import traceback
42
57
  if "leap_binder.set_metadata(" in traceback.format_exc(5):
@@ -45,6 +60,10 @@ class LeapLoader(LeapLoaderBase):
45
60
  raise DatasetScriptException(getattr(e, 'message', repr(e))) from e
46
61
  except Exception as e:
47
62
  raise DatasetScriptException(getattr(e, 'message', repr(e))) from e
63
+ finally:
64
+ # ensure that the environment variable is removed after the script execution
65
+ if mapping_runtime_mode_env_var_mame in os.environ:
66
+ del os.environ[mapping_runtime_mode_env_var_mame]
48
67
 
49
68
  def evaluate_module(self) -> None:
50
69
  def append_path_recursively(full_path: str) -> None:
@@ -58,13 +77,15 @@ class LeapLoader(LeapLoaderBase):
58
77
  file_path = Path(self.code_path, self.code_entry_name)
59
78
  append_path_recursively(str(file_path))
60
79
 
80
+ importlib.invalidate_caches()
81
+
61
82
  spec = importlib.util.spec_from_file_location(self.code_path, file_path)
62
83
  if spec is None or spec.loader is None:
63
- raise DatasetScriptException(f'Something is went wrong with spec file from: {file_path}')
84
+ raise DatasetScriptException(f'Something went wrong with spec file from: {file_path}')
64
85
 
65
86
  file = importlib.util.module_from_spec(spec)
66
87
  if file is None:
67
- raise DatasetScriptException(f'Something is went wrong with import module from: {file_path}')
88
+ raise DatasetScriptException(f'Something went wrong with import module from: {file_path}')
68
89
 
69
90
  spec.loader.exec_module(file)
70
91
 
@@ -136,35 +157,28 @@ class LeapLoader(LeapLoaderBase):
136
157
  for prediction_type in setup.prediction_types
137
158
  }
138
159
 
139
- def get_sample(self, state: DataStateEnum, sample_id: Union[int, str]) -> DatasetSample:
160
+ def get_sample(self, state: DataStateEnum, sample_id: Union[int, str], instance_id: int = None) -> DatasetSample:
140
161
  self.exec_script()
141
162
  preprocess_result = self._preprocess_result()
142
163
  if state == DataStateEnum.unlabeled and sample_id not in preprocess_result[state].sample_ids:
143
164
  self._preprocess_result(update_unlabeled_preprocess=True)
144
165
 
145
- metadata, metadata_is_none = self._get_metadata(state, sample_id)
146
- sample = DatasetSample(inputs=self._get_inputs(state, sample_id),
147
- gt=None if state == DataStateEnum.unlabeled else self._get_gt(state, sample_id),
148
- metadata=metadata,
149
- metadata_is_none=metadata_is_none,
150
- index=sample_id,
151
- state=state)
152
- return sample
153
-
154
- def get_sample_with_masks(self, state: DataStateEnum, sample_id: Union[int, str]) -> DatasetSample:
155
- self.exec_script()
156
- preprocess_result = self._preprocess_result()
157
- if state == DataStateEnum.unlabeled and sample_id not in preprocess_result[state].sample_ids:
158
- self._preprocess_result(update_unlabeled_preprocess=True)
166
+ metadata, metadata_is_none = self.get_metadata(state, sample_id)
159
167
 
160
- metadata, metadata_is_none = self._get_metadata(state, sample_id)
168
+ custom_latent_space = None
169
+ if global_leap_binder.setup_container.custom_latent_space is not None:
170
+ custom_latent_space = global_leap_binder.setup_container.custom_latent_space.function(sample_id,
171
+ preprocess_result[
172
+ state])
173
+ instance_mask = self._get_instances_masks(state, sample_id, instance_id)
161
174
  sample = DatasetSample(inputs=self._get_inputs(state, sample_id),
162
175
  gt=None if state == DataStateEnum.unlabeled else self._get_gt(state, sample_id),
163
176
  metadata=metadata,
164
177
  metadata_is_none=metadata_is_none,
165
178
  index=sample_id,
166
179
  state=state,
167
- instance_masks=self._get_instances_masks(state, sample_id))
180
+ custom_latent_space=custom_latent_space,
181
+ instance_masks=instance_mask)
168
182
  return sample
169
183
 
170
184
  def check_dataset(self) -> DatasetIntegParseResult:
@@ -175,6 +189,7 @@ class LeapLoader(LeapLoaderBase):
175
189
  with redirect_stdout(stdout_steam):
176
190
  try:
177
191
  self.exec_script()
192
+
178
193
  preprocess_test_payload = self._check_preprocess()
179
194
  test_payloads.append(preprocess_test_payload)
180
195
  handlers_test_payloads = self._check_handlers()
@@ -454,12 +469,14 @@ class LeapLoader(LeapLoaderBase):
454
469
  def _get_inputs(self, state: DataStateEnum, sample_id: Union[int, str]) -> Dict[str, npt.NDArray[np.float32]]:
455
470
  return self._get_dataset_handlers(global_leap_binder.setup_container.inputs, state, sample_id)
456
471
 
457
- def _get_instances_masks(self, state: DataStateEnum, sample_id: Union[int, str]) -> Dict[str, List[ElementInstance]]:
472
+ def _get_instances_masks(self, state: DataStateEnum, sample_id: Union[int, str], instance_id: int) -> Optional[Dict[str, ElementInstance]]:
473
+ if instance_id is None:
474
+ return None
458
475
  preprocess_result = self._preprocess_result()
459
476
  preprocess_state = preprocess_result[state]
460
477
  result_agg = {}
461
478
  for handler in global_leap_binder.setup_container.instance_masks:
462
- handler_result = handler.function(sample_id, preprocess_state)
479
+ handler_result = handler.function(sample_id, preprocess_state, instance_id)
463
480
  handler_name = handler.name
464
481
  result_agg[handler_name] = handler_result
465
482
  return result_agg
@@ -504,20 +521,38 @@ class LeapLoader(LeapLoaderBase):
504
521
 
505
522
  return converted_value, is_none
506
523
 
507
- def _get_metadata(self, state: DataStateEnum, sample_id: Union[int, str]) -> Tuple[Dict[str, Union[str, int, bool, float]], Dict[str, bool]]:
524
+ def get_metadata(self, state: DataStateEnum, sample_id: Union[int, str], requested_metadata_names: Optional[List[str]] = None) -> Tuple[
525
+ Dict[str, Union[str, int, bool, float]], Dict[str, bool]]:
526
+
527
+ def is_metadata_name_starts_with_handler_name(_handler):
528
+ for metadata_name in requested_metadata_names:
529
+ if metadata_name.startswith(_handler.name + '_') or metadata_name == _handler.name:
530
+ return True
531
+ return False
532
+
508
533
  result_agg = {}
509
534
  is_none = {}
510
535
  preprocess_result = self._preprocess_result()
511
536
  preprocess_state = preprocess_result[state]
512
537
  for handler in global_leap_binder.setup_container.metadata:
538
+ if requested_metadata_names:
539
+ if not is_metadata_name_starts_with_handler_name(handler):
540
+ continue
541
+
513
542
  handler_result = handler.function(sample_id, preprocess_state)
514
543
  if isinstance(handler_result, dict):
515
544
  for single_metadata_name, single_metadata_result in handler_result.items():
516
545
  handler_name = f'{handler.name}_{single_metadata_name}'
546
+ if requested_metadata_names:
547
+ if handler_name not in requested_metadata_names:
548
+ continue
517
549
  result_agg[handler_name], is_none[handler_name] = self._convert_metadata_to_correct_type(
518
550
  handler_name, single_metadata_result)
519
551
  else:
520
552
  handler_name = handler.name
553
+ if requested_metadata_names:
554
+ if handler_name not in requested_metadata_names:
555
+ continue
521
556
  result_agg[handler_name], is_none[handler_name] = self._convert_metadata_to_correct_type(
522
557
  handler_name, handler_result)
523
558
 
@@ -533,7 +568,12 @@ class LeapLoader(LeapLoaderBase):
533
568
 
534
569
  return id_type
535
570
 
536
- def get_instances_data(self, state: DataStateEnum) -> Tuple[Dict[Union[int, str], List[Union[int, str]]], Dict[Union[int, str], Union[int, str]], List[Union[int, str]]]:
571
+ @lru_cache()
572
+ def has_custom_latent_space_decorator(self) -> bool:
573
+ self.exec_script()
574
+ return global_leap_binder.setup_container.custom_latent_space is not None
575
+
576
+ def get_instances_data(self, state: DataStateEnum) -> Tuple[Dict[str, List[str]], Dict[str, str]]:
537
577
  """
538
578
  This Method get the data state and returns two dictionaries that holds the mapping of the sample ids to their
539
579
  instances and the other way around and the sample ids array.
@@ -546,4 +586,4 @@ class LeapLoader(LeapLoaderBase):
546
586
  """
547
587
  preprocess_result = self._preprocess_result()
548
588
  preprocess_state = preprocess_result[state]
549
- return preprocess_state.sample_ids_to_instance_mappings, preprocess_state.instance_to_sample_ids_mappings, preprocess_state.sample_ids
589
+ return preprocess_state.sample_ids_to_instance_mappings, preprocess_state.instance_to_sample_ids_mappings
@@ -61,16 +61,30 @@ class LeapLoaderBase:
61
61
  pass
62
62
 
63
63
  @abstractmethod
64
- def get_sample(self, state: DataStateEnum, sample_id: Union[int, str]) -> DatasetSample:
64
+ def get_sample(self, state: DataStateEnum, sample_id: Union[int, str], instance_id: int = None) -> DatasetSample:
65
65
  pass
66
66
 
67
67
  @abstractmethod
68
- def get_sample_with_masks(self, state: DataStateEnum, sample_id: Union[int, str]) -> DatasetSample:
68
+ def get_instances_data(self, state: DataStateEnum) -> Tuple[Dict[str, List[str]], Dict[str, str]]:
69
69
  pass
70
70
 
71
- @abstractmethod
72
- def get_instances_data(self, state: DataStateEnum) -> Tuple[Dict[Union[int, str], List[Union[int, str]]], Dict[Union[int, str], Union[int, str]], List[Union[int, str]]]:
73
- pass
71
+ def get_metadata_multiple_samples(self, state: DataStateEnum, sample_ids: Union[List[int], List[str]],
72
+ requested_metadata_names: Optional[List[str]] = None
73
+ ) -> Tuple[Dict[str, Union[List[str], List[int], List[bool],
74
+ List[float]]], Dict[str, List[bool]]]:
75
+ aggregated_results: Dict[str, List[Union[str, int, bool, float]]] = {}
76
+ aggregated_is_none: Dict[str, List[bool]] = {}
77
+ sample_id_type = self.get_sample_id_type()
78
+ for sample_id in sample_ids:
79
+ sample_id = sample_id_type(sample_id)
80
+ metadata_result, is_none_result = self.get_metadata(state, sample_id, requested_metadata_names)
81
+ for metadata_name, metadata_value in metadata_result.items():
82
+ if metadata_name not in aggregated_results:
83
+ aggregated_results[metadata_name] = []
84
+ aggregated_is_none[metadata_name] = []
85
+ aggregated_results[metadata_name].append(metadata_value)
86
+ aggregated_is_none[metadata_name].append(is_none_result[metadata_name])
87
+ return aggregated_results, aggregated_is_none
74
88
 
75
89
  @abstractmethod
76
90
  def check_dataset(self) -> DatasetIntegParseResult:
@@ -91,6 +105,13 @@ class LeapLoaderBase:
91
105
  input_tensors_by_arg_name: Dict[str, npt.NDArray[np.float32]]):
92
106
  pass
93
107
 
108
+ @abstractmethod
109
+ def get_metadata(
110
+ self, state: DataStateEnum, sample_id: Union[int, str],
111
+ requested_metadata_names: Optional[List[str]] = None
112
+ ) -> Tuple[Dict[str, Union[str, int, bool, float]], Dict[str, bool]]:
113
+ pass
114
+
94
115
  @abstractmethod
95
116
  def run_heatmap_visualizer(self, visualizer_name: str, sample_ids: np.array, state: DataStateEnum,
96
117
  input_tensors_by_arg_name: Dict[str, npt.NDArray[np.float32]]
@@ -114,6 +135,10 @@ class LeapLoaderBase:
114
135
  def get_sample_id_type(self) -> Type:
115
136
  pass
116
137
 
138
+ @abstractmethod
139
+ def has_custom_latent_space_decorator(self) -> bool:
140
+ pass
141
+
117
142
  @abstractmethod
118
143
  def get_heatmap_visualizer_raw_vis_input_arg_name(self, visualizer_name: str) -> Optional[str]:
119
144
  pass
@@ -0,0 +1,230 @@
1
+ """
2
+ Mixpanel tracking utilities for code-loader.
3
+ """
4
+ import os
5
+ import sys
6
+ import getpass
7
+ import uuid
8
+ import logging
9
+ from enum import Enum
10
+ from typing import Optional, Dict, Any, Set, Union, TypedDict
11
+ import mixpanel # type: ignore[import]
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ TRACKING_VERSION = '1'
16
+
17
+
18
+ class AnalyticsEvent(str, Enum):
19
+ """Enumeration of all tracked analytics events."""
20
+ CODE_LOADER_LOADED = "code_loader_loaded"
21
+ LOAD_MODEL_INTEGRATION_TEST = "load_model_integration_test"
22
+ PREPROCESS_INTEGRATION_TEST = "preprocess_integration_test"
23
+ INPUT_ENCODER_INTEGRATION_TEST = "input_encoder_integration_test"
24
+ GT_ENCODER_INTEGRATION_TEST = "gt_encoder_integration_test"
25
+
26
+
27
+ class CodeLoaderLoadedProps(TypedDict, total=False):
28
+ """Properties for code_loader_loaded event."""
29
+ event_type: str
30
+ code_path: str
31
+ code_entry_name: str
32
+
33
+
34
+ class LoadModelEventProps(TypedDict, total=False):
35
+ """Properties for load_model_integration_test event."""
36
+ prediction_types_count: int
37
+
38
+
39
+ class PreprocessEventProps(TypedDict, total=False):
40
+ """Properties for preprocess_integration_test event."""
41
+ preprocess_responses_count: int
42
+
43
+
44
+ class InputEncoderEventProps(TypedDict, total=False):
45
+ """Properties for input_encoder_integration_test event."""
46
+ encoder_name: str
47
+ channel_dim: int
48
+
49
+
50
+ class GtEncoderEventProps(TypedDict, total=False):
51
+ """Properties for gt_encoder_integration_test event."""
52
+ encoder_name: str
53
+
54
+
55
+ class MixpanelTracker:
56
+ """Handles Mixpanel event tracking for code-loader."""
57
+
58
+ def __init__(self, token: str = "0c1710c9656bbfb1056bb46093e23ca1"):
59
+ self.token = token
60
+ self.mp = mixpanel.Mixpanel(token)
61
+ self._user_id: Optional[str] = None
62
+
63
+ def _get_whoami(self) -> str:
64
+ """Get the current system username (whoami) for device identification.
65
+
66
+ Returns:
67
+ str: The system username, with fallbacks to environment variables or 'unknown'
68
+ """
69
+ if self._user_id is None:
70
+ try:
71
+ self._user_id = getpass.getuser()
72
+ except Exception as e:
73
+ logger.debug(f"Failed to get username via getpass: {e}")
74
+ # Fallback to environment variables or default
75
+ self._user_id = os.environ.get('USER', os.environ.get('USERNAME', 'unknown'))
76
+ return self._user_id or 'unknown'
77
+
78
+
79
+ def _get_tensorleap_user_id(self) -> Optional[str]:
80
+ """Get the TensorLeap user ID from ~/.tensorleap/user_id if it exists."""
81
+ try:
82
+ user_id_path = os.path.expanduser("~/.tensorleap/user_id")
83
+ if os.path.exists(user_id_path):
84
+ with open(user_id_path, 'r') as f:
85
+ user_id = f.read().strip()
86
+ if user_id:
87
+ return user_id
88
+ except Exception as e:
89
+ logger.debug(f"Failed to read TensorLeap user ID: {e}")
90
+ return None
91
+
92
+ def _get_or_create_device_id(self) -> str:
93
+ """Get or create a device ID from ~/.tensorleap/device_id file.
94
+
95
+ If the file doesn't exist, creates it with a new UUID.
96
+
97
+ Returns:
98
+ str: The device ID (UUID string)
99
+ """
100
+ try:
101
+ device_id_path = os.path.expanduser("~/.tensorleap/device_id")
102
+
103
+ # Create directory if it doesn't exist
104
+ os.makedirs(os.path.dirname(device_id_path), exist_ok=True)
105
+
106
+ if os.path.exists(device_id_path):
107
+ with open(device_id_path, 'r') as f:
108
+ device_id = f.read().strip()
109
+ if device_id:
110
+ return device_id
111
+
112
+ # Generate new device ID and save it
113
+ device_id = str(uuid.uuid4())
114
+ with open(device_id_path, 'w') as f:
115
+ f.write(device_id)
116
+
117
+ return device_id
118
+ except Exception as e:
119
+ logger.debug(f"Failed to read/write device ID file: {e}")
120
+ # Fallback to generating a new UUID if file operations fail
121
+ return str(uuid.uuid4())
122
+
123
+ def _get_distinct_id(self) -> str:
124
+ """Get the distinct ID for Mixpanel tracking.
125
+
126
+ Priority order:
127
+ 1. TensorLeap user ID (from ~/.tensorleap/user_id)
128
+ 2. Device ID (from ~/.tensorleap/device_id, generated if not exists)
129
+ """
130
+ tensorleap_user_id = self._get_tensorleap_user_id()
131
+ if tensorleap_user_id:
132
+ return tensorleap_user_id
133
+
134
+ return self._get_or_create_device_id()
135
+
136
+ def _track_event(self, event_name: Union[str, AnalyticsEvent], event_properties: Optional[Dict[str, Any]] = None) -> None:
137
+ """Internal method to track any event with device identification.
138
+
139
+ Args:
140
+ event_name: The name of the event to track (string or AnalyticsEvent enum)
141
+ event_properties: Optional additional properties to include in the event
142
+ """
143
+ # Skip tracking if IS_TENSORLEAP_PLATFORM environment variable is set to 'true'
144
+ if os.environ.get('IS_TENSORLEAP_PLATFORM') == 'true':
145
+ return
146
+
147
+ try:
148
+ distinct_id = self._get_distinct_id()
149
+
150
+ tensorleap_user_id = self._get_tensorleap_user_id()
151
+ whoami = self._get_whoami()
152
+ device_id = self._get_or_create_device_id()
153
+
154
+ properties = {
155
+ 'tracking_version': TRACKING_VERSION,
156
+ 'service': 'code-loader',
157
+ 'whoami': whoami,
158
+ '$device_id': device_id, # Always use device_id for $device_id
159
+ 'python_version': f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
160
+ 'platform': os.name,
161
+ }
162
+
163
+ if tensorleap_user_id:
164
+ properties['user_id'] = tensorleap_user_id
165
+
166
+ if event_properties:
167
+ properties.update(event_properties)
168
+
169
+ self.mp.track(distinct_id, str(event_name), properties)
170
+ except Exception as e:
171
+ logger.debug(f"Failed to track event '{event_name}': {e}")
172
+
173
+ def track_code_loader_loaded(self, event_properties: Optional[Dict[str, Any]] = None) -> None:
174
+ """Track code loader loaded event with device identification.
175
+
176
+ Args:
177
+ event_properties: Optional additional properties to include in the event
178
+ """
179
+ self._track_event(AnalyticsEvent.CODE_LOADER_LOADED, event_properties)
180
+
181
+ def track_integration_test_event(self, event_name: Union[str, AnalyticsEvent], event_properties: Optional[Dict[str, Any]] = None) -> None:
182
+ """Track an integration test event with device identification.
183
+
184
+ Args:
185
+ event_name: The name of the event to track (string or AnalyticsEvent enum)
186
+ event_properties: Optional additional properties to include in the event
187
+ """
188
+ self._track_event(event_name, event_properties)
189
+
190
+
191
+ # Global tracker instance
192
+ _tracker = None
193
+
194
+
195
+ def get_tracker() -> MixpanelTracker:
196
+ global _tracker
197
+ if _tracker is None:
198
+ _tracker = MixpanelTracker()
199
+ return _tracker
200
+
201
+
202
+ def track_code_loader_loaded(event_properties: Optional[Dict[str, Any]] = None) -> None:
203
+ get_tracker().track_code_loader_loaded(event_properties)
204
+
205
+
206
+ def track_integration_test_event(event_name: Union[str, AnalyticsEvent], event_properties: Optional[Dict[str, Any]] = None) -> None:
207
+ get_tracker().track_integration_test_event(event_name, event_properties)
208
+
209
+
210
+ # Module-level set to track which integration test events have been emitted
211
+ _integration_events_emitted: Set[str] = set()
212
+
213
+
214
+ def emit_integration_event_once(event_name: Union[str, AnalyticsEvent], props: Dict[str, Any]) -> None:
215
+ """Emit an integration test event only once per test run."""
216
+ event_name_str = str(event_name)
217
+ if event_name_str in _integration_events_emitted:
218
+ return
219
+
220
+ try:
221
+ track_integration_test_event(event_name, props)
222
+ _integration_events_emitted.add(event_name_str)
223
+ except Exception as e:
224
+ logger.debug(f"Failed to emit integration event once '{event_name}': {e}")
225
+
226
+
227
+ def clear_integration_events() -> None:
228
+ """Clear the integration events set for a new test run."""
229
+ global _integration_events_emitted
230
+ _integration_events_emitted.clear()
File without changes