label-studio-sdk 0.0.32__py3-none-any.whl → 0.0.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of label-studio-sdk might be problematic. Click here for more details.

Files changed (38) hide show
  1. label_studio_sdk/__init__.py +4 -1
  2. label_studio_sdk/client.py +94 -78
  3. label_studio_sdk/data_manager.py +32 -23
  4. label_studio_sdk/exceptions.py +10 -0
  5. label_studio_sdk/label_interface/__init__.py +1 -0
  6. label_studio_sdk/label_interface/base.py +77 -0
  7. label_studio_sdk/label_interface/control_tags.py +756 -0
  8. label_studio_sdk/label_interface/interface.py +922 -0
  9. label_studio_sdk/label_interface/label_tags.py +72 -0
  10. label_studio_sdk/label_interface/object_tags.py +292 -0
  11. label_studio_sdk/label_interface/region.py +43 -0
  12. label_studio_sdk/objects.py +35 -0
  13. label_studio_sdk/project.py +711 -258
  14. label_studio_sdk/schema/label_config_schema.json +226 -0
  15. label_studio_sdk/users.py +15 -13
  16. label_studio_sdk/utils.py +31 -30
  17. label_studio_sdk/workspaces.py +13 -11
  18. {label_studio_sdk-0.0.32.dist-info → label_studio_sdk-0.0.34.dist-info}/METADATA +3 -1
  19. label_studio_sdk-0.0.34.dist-info/RECORD +37 -0
  20. {label_studio_sdk-0.0.32.dist-info → label_studio_sdk-0.0.34.dist-info}/WHEEL +1 -1
  21. {label_studio_sdk-0.0.32.dist-info → label_studio_sdk-0.0.34.dist-info}/top_level.txt +0 -1
  22. tests/test_client.py +21 -10
  23. tests/test_export.py +105 -0
  24. tests/test_interface/__init__.py +1 -0
  25. tests/test_interface/configs.py +137 -0
  26. tests/test_interface/mockups.py +22 -0
  27. tests/test_interface/test_compat.py +64 -0
  28. tests/test_interface/test_control_tags.py +55 -0
  29. tests/test_interface/test_data_generation.py +45 -0
  30. tests/test_interface/test_lpi.py +15 -0
  31. tests/test_interface/test_main.py +196 -0
  32. tests/test_interface/test_object_tags.py +36 -0
  33. tests/test_interface/test_region.py +36 -0
  34. tests/test_interface/test_validate_summary.py +35 -0
  35. tests/test_interface/test_validation.py +59 -0
  36. docs/__init__.py +0 -3
  37. label_studio_sdk-0.0.32.dist-info/RECORD +0 -15
  38. {label_studio_sdk-0.0.32.dist-info → label_studio_sdk-0.0.34.dist-info}/LICENSE +0 -0
@@ -0,0 +1,922 @@
1
+ """
2
+ """
3
+
4
+ import os
5
+ import copy
6
+ import logging
7
+ import re
8
+ import json
9
+ import jsonschema
10
+
11
+ from typing import Dict, Optional, List, Tuple, Any, Callable, Union
12
+ from pydantic import BaseModel
13
+
14
+ # from typing import Dict, Optional, List, Tuple, Any
15
+ from collections import defaultdict
16
+ from lxml import etree
17
+ import xmljson
18
+
19
+ from label_studio_sdk.exceptions import (
20
+ LSConfigParseException,
21
+ LabelStudioXMLSyntaxErrorSentryIgnored,
22
+ LabelStudioValidationErrorSentryIgnored,
23
+ )
24
+
25
+ from label_studio_sdk.label_interface.control_tags import (
26
+ ControlTag,
27
+ ChoicesTag,
28
+ LabelsTag,
29
+ )
30
+ from label_studio_sdk.label_interface.object_tags import ObjectTag
31
+ from label_studio_sdk.label_interface.label_tags import LabelTag
32
+ from label_studio_sdk.objects import AnnotationValue, TaskValue, PredictionValue
33
+
34
+
35
+ dir_path = os.path.dirname(os.path.realpath(__file__))
36
+ file_path = os.path.join(dir_path, "..", "schema", "label_config_schema.json")
37
+
38
+ with open(file_path) as f:
39
+ _LABEL_CONFIG_SCHEMA_DATA = json.load(f)
40
+
41
+ _LABEL_TAGS = {"Label", "Choice", "Relation"}
42
+
43
+ _DIR_APP_NAME = "label-studio"
44
+ _VIDEO_TRACKING_TAGS = {"videorectangle"}
45
+
46
+ RESULT_KEY = "result"
47
+
48
+ ############ core/label_config.py
49
+
50
+
51
+ def merge_labels_counters(dict1, dict2):
52
+ """
53
+ Merge two dictionaries with nested dictionary values into a single dictionary.
54
+
55
+ Args:
56
+ dict1 (dict): The first dictionary to merge.
57
+ dict2 (dict): The second dictionary to merge.
58
+
59
+ Returns:
60
+ dict: A new dictionary with the merged nested dictionaries.
61
+
62
+ Example:
63
+ dict1 = {'sentiment': {'Negative': 1, 'Positive': 1}}
64
+ dict2 = {'sentiment': {'Positive': 2, 'Neutral': 1}}
65
+ result_dict = merge_nested_dicts(dict1, dict2)
66
+ # {'sentiment': {'Negative': 1, 'Positive': 3, 'Neutral': 1}}
67
+ """
68
+ result_dict = {}
69
+
70
+ # iterate over keys in both dictionaries
71
+ for key in set(dict1.keys()) | set(dict2.keys()):
72
+ # add the corresponding values if they exist in both dictionaries
73
+ value = {}
74
+ if key in dict1:
75
+ value.update(dict1[key])
76
+ if key in dict2:
77
+ for subkey in dict2[key]:
78
+ value[subkey] = value.get(subkey, 0) + dict2[key][subkey]
79
+ # add the key-value pair to the result dictionary
80
+ result_dict[key] = value
81
+
82
+ return result_dict
83
+
84
+
85
+ def _fix_choices(config):
86
+ """
87
+ workaround for single choice
88
+ https://github.com/heartexlabs/label-studio/issues/1259
89
+ """
90
+ if "Choices" in config:
91
+ # for single Choices tag in View
92
+ if "Choice" in config["Choices"] and not isinstance(
93
+ config["Choices"]["Choice"], list
94
+ ):
95
+ config["Choices"]["Choice"] = [config["Choices"]["Choice"]]
96
+ # for several Choices tags in View
97
+ elif isinstance(config["Choices"], list) and all(
98
+ "Choice" in tag_choices for tag_choices in config["Choices"]
99
+ ):
100
+ for n in range(len(config["Choices"])):
101
+ # check that Choices tag has only 1 choice
102
+ if not isinstance(config["Choices"][n]["Choice"], list):
103
+ config["Choices"][n]["Choice"] = [config["Choices"][n]["Choice"]]
104
+ if "View" in config:
105
+ if isinstance(config["View"], OrderedDict):
106
+ config["View"] = _fix_choices(config["View"])
107
+ else:
108
+ config["View"] = [_fix_choices(view) for view in config["View"]]
109
+ return config
110
+
111
+
112
+ def get_annotation_tuple(from_name, to_name, type):
113
+ if isinstance(to_name, list):
114
+ to_name = ",".join(to_name)
115
+ return "|".join([from_name, to_name, type.lower()])
116
+
117
+
118
+ def get_all_control_tag_tuples(label_config):
119
+ # "chc|text|choices"
120
+ outputs = parse_config(label_config)
121
+ out = []
122
+ for control_name, info in outputs.items():
123
+ out.append(get_annotation_tuple(control_name, info["to_name"], info["type"]))
124
+ return out
125
+
126
+
127
+ def get_all_types(label_config):
128
+ """
129
+ Get all types from label_config
130
+ """
131
+ outputs = parse_config(label_config)
132
+ out = []
133
+ for control_name, info in outputs.items():
134
+ out.append(info["type"].lower())
135
+ return out
136
+
137
+
138
+ def display_count(count: int, type: str) -> Optional[str]:
139
+ """Helper for displaying pluralized sources of validation errors,
140
+ eg "1 draft" or "3 annotations"
141
+ """
142
+ if not count:
143
+ return None
144
+
145
+ return f'{count} {type}{"s" if count > 1 else ""}'
146
+
147
+
148
+ ######################
149
+
150
+
151
+ class LabelInterface:
152
+ """The LabelInterface class serves as an interface to parse and
153
+ validate labeling configurations, annotations, and predictions
154
+ within the Label Studio ecosystem.
155
+
156
+ It is designed to be compatible at the data structure level with
157
+ an existing parser widely used within the Label Studio ecosystem.
158
+ This ensures that it works seamlessly with most of the existing functions,
159
+ either by directly supporting them or by offering re-implemented versions
160
+ through the new interface.
161
+
162
+ Moreover, the parser adds value by offering functionality to
163
+ validate predictions and annotations against the specified
164
+ labeling configuration. Below is a simple example of how to use
165
+ the new API:
166
+
167
+ ```python
168
+ from label_studio_sdk.label_interface import LabelInterface
169
+
170
+ config = "<View><Text name='txt' value='$val' /><Choices name='chc' toName='txt'><Choice value='one'/> <Choice value='two'/></Choices></View>"
171
+
172
+ li = LabelInterface(config)
173
+ region = li.get_tag("chc").label("one")
174
+
175
+ # returns a JSON representing a Label Studio region
176
+ region.as_json()
177
+
178
+ # returns True
179
+ li.validate_prediction({
180
+ "model_version": "0.0.1",
181
+ "score": 0.90,
182
+ "result": [{
183
+ "from_name": "chc",
184
+ "to_name": "txt",
185
+ "type": "choices",
186
+ "value": { "choices": ["one"] }
187
+ }]
188
+ })
189
+ ```
190
+ """
191
+
192
+ def __init__(self, config: str, *args, **kwargs):
193
+ """
194
+ Create LabelInterface instance from the config string
195
+ Example:
196
+ ```
197
+ label_config = LabelInterface('''
198
+ <View>
199
+ <Choices name="sentiment" toName="txt">
200
+ <Choice value="Positive" />
201
+ <Choice value="Negative" />
202
+ <Choice value="Neutral" />
203
+ </Choices>
204
+ <Text name="txt" value="$text" />
205
+ ''')
206
+ """
207
+ self._config = config
208
+
209
+ # extract predefined task from the config
210
+ _task_data, _ann, _pred = LabelInterface.get_task_from_labeling_config(config)
211
+ self._sample_config_task = _task_data
212
+ self._sample_config_ann = _ann
213
+ self._sample_config_pred = _pred
214
+
215
+ controls, objects, labels, tree = self.parse(config)
216
+ controls = self._link_controls(controls, objects, labels)
217
+
218
+ # list of control tags that this config has
219
+ self._control_tags = set(controls.keys())
220
+ self._object_tags = set(objects.keys())
221
+ # self._label_names = set(labels.keys())
222
+
223
+ self._controls = controls
224
+ self._objects = objects
225
+ self._labels = labels
226
+ self._tree = tree
227
+
228
+ ##### NEW API
229
+
230
+ @property
231
+ def controls(self):
232
+ """Returns list of control tags"""
233
+ return self._controls and self._controls.values()
234
+
235
+ @property
236
+ def objects(self):
237
+ """Returns list of object tags"""
238
+ return self._objects and self._objects.values()
239
+
240
+ @property
241
+ def labels(self):
242
+ """Returns list of label tags"""
243
+ return self._labels and self._labels.values()
244
+
245
+ def _link_controls(self, controls: Dict, objects: Dict, labels: Dict) -> Dict:
246
+ """ """
247
+ for name, tag in controls.items():
248
+ inputs = []
249
+ for object_tag_name in tag.to_name:
250
+ if object_tag_name not in objects:
251
+ # logger.info(
252
+ # f'to_name={object_tag_name} is specified for output tag name={name}, '
253
+ # 'but we can\'t find it among input tags'
254
+ # )
255
+ continue
256
+
257
+ inputs.append(objects[object_tag_name])
258
+
259
+ tag.set_objects(inputs)
260
+ tag.set_labels(list(labels[name]))
261
+ tag.set_labels_attrs(labels[name])
262
+
263
+ return controls
264
+
265
+ def _get_tag(self, name, tag_store):
266
+ """ """
267
+ if name is not None:
268
+ if name not in tag_store:
269
+ raise Exception(
270
+ f"Name {name} is not found, available names: {tag_store.keys()}"
271
+ )
272
+ else:
273
+ return tag_store[name]
274
+
275
+ if tag_store and len(tag_store.keys()) > 1:
276
+ raise Exception("Multiple object tags connected, you should specify name")
277
+
278
+ return list(tag_store.values())[0]
279
+
280
+ def get_tag(self, name):
281
+ """Method to retrieve the tag object by its name from the current instance.
282
+
283
+ The method checks if the tag with the provided name exists in
284
+ either `_controls` or `_objects` attributes of the current
285
+ instance. If a match is found, it returns the tag. If the tag
286
+ is not found an exception is raised.
287
+
288
+ Args:
289
+ name (str): Name of the tag to be retrieved.
290
+
291
+ Returns:
292
+ object: The tag object if it exists in either `_controls` or `_objects`.
293
+
294
+ Raises:
295
+ Exception: If the tag with the given name does not exist in both `_controls` and `_objects`.
296
+
297
+ """
298
+ if name in self._controls:
299
+ return self._controls[name]
300
+
301
+ if name in self._objects:
302
+ return self._objects[name]
303
+
304
+ raise Exception(f"Tag with name {name} not found")
305
+
306
+ def get_object(self, name=None):
307
+ """Retrieves the object with the given name from `_objects`.
308
+
309
+ This utilizes the `_get_tag` method to obtain the named object.
310
+
311
+ Args:
312
+ name (str, optional): The name of the object to be retrieved from `_objects`.
313
+
314
+ Returns: object: The corresponding object if it exists in
315
+ `_objects`.
316
+
317
+ """
318
+ return self._get_tag(name, self._objects)
319
+
320
+ def get_output(self, name=None):
321
+ """Provides an alias for the `get_control` method."""
322
+ return self.get_control(name)
323
+
324
+ def get_control(self, name=None):
325
+ """Retrieves the control tag that the control tag maps to.
326
+
327
+ This uses the `_get_tag` method to obtain the named control.
328
+
329
+ Args:
330
+ name (str, optional): The name of the control to be retrieved.
331
+
332
+ Returns: object: The corresponding control if it exists in
333
+ `_controls`.
334
+
335
+ """
336
+ return self._get_tag(name, self._controls)
337
+
338
+ def find_tags_by_class(self, tag_class) -> List:
339
+ """Finds tags by their class type.
340
+
341
+ The function looks into both `self.objects` and
342
+ `self.controls` to find tags that are instances of the
343
+ provided class(es)
344
+
345
+ Args:
346
+ tag_class (class or list of classes): The class type(s) of the tags to be found.
347
+
348
+ Returns:
349
+ list: A list of tags that are instances of the provided `tag_class`(es).
350
+
351
+ """
352
+ lst = list(self.objects) + list(self.controls)
353
+ tag_classes = [tag_class] if not isinstance(tag_class, list) else tag_class
354
+
355
+ return [tag for tag in lst for cls in tag_classes if isinstance(tag, cls)]
356
+
357
+ def find_tags(
358
+ self, tag_type: Optional[str] = None, match_fn: Optional[Callable] = None
359
+ ) -> List:
360
+ """Finds tags that match the given function in the entire parsed tree.
361
+
362
+ This function searches through both `objects` and `controls`
363
+ based on `tag_type`, and applies the `match_fn` (if provided)
364
+ to filter matching tags.
365
+
366
+ Args:
367
+ tag_type (str, optional): The type of tags to be
368
+ searched. Categories include 'objects', 'controls',
369
+ 'inputs' (alias for 'objects'), 'outputs' (alias for
370
+ 'controls'). If not specified, searches both 'objects'
371
+ and 'controls'.
372
+ match_fn (Callable, optional): A function that takes a tag
373
+ as an input and returns a boolean value indicating
374
+ whether the tag matches the required condition.
375
+
376
+ Returns: list: A list of tags that match the given type and
377
+ satisfy `match_fn`.
378
+
379
+ """
380
+ tag_types = {
381
+ "objects": self.objects,
382
+ "controls": self.controls,
383
+ # aliases
384
+ "inputs": self.objects,
385
+ "outputs": self.controls,
386
+ }
387
+
388
+ lst = tag_types.get(tag_type, list(self.objects) + list(self.controls))
389
+
390
+ if match_fn is not None:
391
+ lst = list(filter(match_fn, lst))
392
+
393
+ return lst
394
+
395
+ def parse(self, config_string: str) -> Tuple[Dict, Dict, Dict, etree._Element]:
396
+ """Parses the received configuration string into dictionaries
397
+ of ControlTags, ObjectTags, and Labels, along with an XML tree
398
+ of the configuration.
399
+
400
+ Args:
401
+ config_string (str): the configuration string to be parsed.
402
+
403
+ Returns:
404
+ Tuple of:
405
+ - Dictionary where keys are control tag names and values are ControlTag instances.
406
+ - Dictionary where keys are object tag names and values are ObjectTag instances.
407
+ - Dictionary of dictionaries where primary keys are label parent names
408
+ and secondary keys are label values and values are LabelTag instances.
409
+ - An XML tree of the configuration.
410
+ """
411
+ try:
412
+ xml_tree = etree.fromstring(config_string)
413
+ except etree.XMLSyntaxError as e:
414
+ raise LabelStudioXMLSyntaxErrorSentryIgnored(str(e))
415
+
416
+ objects, controls, labels = {}, {}, defaultdict(dict)
417
+
418
+ variables = []
419
+
420
+ for tag in xml_tree.iter():
421
+ if tag.attrib and "indexFlag" in tag.attrib:
422
+ variables.append(tag.attrib["indexFlag"])
423
+
424
+ if ControlTag.validate_node(tag):
425
+ controls[tag.attrib["name"]] = ControlTag.parse_node(tag)
426
+
427
+ elif ObjectTag.validate_node(tag):
428
+ objects[tag.attrib["name"]] = ObjectTag.parse_node(tag)
429
+
430
+ elif LabelTag.validate_node(tag):
431
+ lb = LabelTag.parse_node(tag, controls)
432
+ labels[lb.parent_name][lb.value] = lb
433
+
434
+ return controls, objects, labels, xml_tree
435
+
436
+ @classmethod
437
+ def parse_config_to_json(cls, config_string):
438
+ """ """
439
+ try:
440
+ xml = etree.fromstring(config_string)
441
+ except TypeError:
442
+ raise etree.ParseError("can only parse strings")
443
+ if xml is None:
444
+ raise etree.ParseError("xml is empty or incorrect")
445
+
446
+ config = xmljson.badgerfish.data(xml)
447
+ config = _fix_choices(config)
448
+
449
+ return config
450
+
451
+ def _schema_validation(self, config_string):
452
+ """ """
453
+ try:
454
+ config = LabelInterface.parse_config_to_json(config_string)
455
+ jsonschema.validate(config, _LABEL_CONFIG_SCHEMA_DATA)
456
+ except (etree.ParseError, ValueError) as exc:
457
+ raise LabelStudioValidationErrorSentryIgnored(str(exc))
458
+ except jsonschema.exceptions.ValidationError as exc:
459
+ error_message = exc.context[-1].message if len(exc.context) else exc.message
460
+ error_message = "Validation failed on {}: {}".format(
461
+ "/".join(map(str, exc.path)), error_message.replace("@", "")
462
+ )
463
+ raise LabelStudioValidationErrorSentryIgnored(error_message)
464
+
465
+ def _to_name_validation(self, config_string):
466
+ """ """
467
+ # toName points to existent name
468
+ all_names = re.findall(r'name="([^"]*)"', config_string)
469
+
470
+ names = set(all_names)
471
+ toNames = re.findall(r'toName="([^"]*)"', config_string)
472
+ for toName_ in toNames:
473
+ for toName in toName_.split(","):
474
+ if toName not in names:
475
+ raise LabelStudioValidationErrorSentryIgnored(
476
+ f'toName="{toName}" not found in names: {sorted(names)}'
477
+ )
478
+
479
+ def _unique_names_validation(self, config_string):
480
+ """ """
481
+ # unique names in config # FIXME: 'name =' (with spaces) won't work
482
+ all_names = re.findall(r'name="([^"]*)"', config_string)
483
+ if len(set(all_names)) != len(all_names):
484
+ raise LabelStudioValidationErrorSentryIgnored(
485
+ "Label config contains non-unique names"
486
+ )
487
+
488
+ def load_task(self, task):
489
+ """Loads a task and substitutes the value in each object tag
490
+ with actual data from the task, returning a copy of the
491
+ LabelConfig object.
492
+
493
+ If the `value` field in an object tag is designed to take
494
+ variable input (i.e., `value_is_variable` is True), the
495
+ function replaces this value with the corresponding value from
496
+ the task dictionary.
497
+
498
+ Args:
499
+ task (dict): Dictionary representing the task, where
500
+ each key-value pair denotes an attribute-value of the
501
+ task.
502
+
503
+ Returns:
504
+ LabelInterface: A deep copy of the current LabelIntreface
505
+ instance with the object tags' value fields populated with
506
+ data from the task.
507
+
508
+ """
509
+ tree = copy.deepcopy(self)
510
+ for obj in tree.objects:
511
+ if obj.value_is_variable and obj.value_name in task:
512
+ obj.value = task.get(obj.value_name)
513
+
514
+ return tree
515
+
516
+ @property
517
+ def is_valid(self):
518
+ """ """
519
+ try:
520
+ self.validate()
521
+ return True
522
+ except LabelStudioValidationErrorSentryIgnored:
523
+ return False
524
+
525
+ def validate(self):
526
+ """Validates the provided configuration string against various validation criteria.
527
+
528
+ This method applies a series of validation checks to
529
+ `_config`, including schema validation, checking for
530
+ uniqueness of names used in the configuration, and the
531
+ "to_name" validation. It throws exceptions if any of these
532
+ validations fail.
533
+
534
+ Raises:
535
+ Exception: If any validation fails, specific to the type of validation.
536
+
537
+ """
538
+ config_string = self._config
539
+
540
+ self._schema_validation(config_string)
541
+ self._unique_names_validation(config_string)
542
+ self._to_name_validation(config_string)
543
+
544
+ @classmethod
545
+ def validate_with_data(cls, config):
546
+ """ """
547
+ raise NotImplemented()
548
+
549
+ def validate_task(self, task: "TaskValue", validate_regions_only=False):
550
+ """ """
551
+ # TODO this might not be always true, and we need to use
552
+ # "strict" param above to be able to configure
553
+
554
+ # for every object tag we've got that has value as it's
555
+ # variable we need to have an associated item in the task data
556
+ for obj in self.objects:
557
+ if obj.value_is_variable and task["data"].get(obj.value_name, None) is None:
558
+ return False
559
+
560
+ if "annotations" in task and not self.validate_annotation():
561
+ return False
562
+
563
+ if "predictions" in task and not self.validate_prediction():
564
+ return False
565
+
566
+ return True
567
+
568
+ def validate_annotation(self, annotation):
569
+ """Validates the given annotation against the current configuration.
570
+
571
+ This method applies the `validate_region` method to each
572
+ region in the annotation and returns False if any of these
573
+ validations fail. If all the regions pass the validation, it
574
+ returns True.
575
+
576
+ Args:
577
+ annotation (dict): The annotation to be validated, where
578
+ each key-value pair denotes an attribute-value of the
579
+ annotation.
580
+
581
+ Returns:
582
+ bool: True if all regions in the annotation pass the
583
+ validation, False otherwise.
584
+
585
+ """
586
+ return all(self.validate_region(r) for r in annotation.get(RESULT_KEY))
587
+
588
+ def validate_prediction(self, prediction):
589
+ """Same as validate_annotation right now"""
590
+ return all(self.validate_region(r) for r in prediction.get(RESULT_KEY))
591
+
592
+ def validate_region(self, region) -> bool:
593
+ """Validates a region from the annotation against the current
594
+ configuration.
595
+
596
+ The validation checks the following:
597
+ - Both control and object items are present in the labeling configuration.
598
+ - The type of the region matches the control tag name.
599
+ - The 'to_name' in the region data connects to the same tag as in the configuration.
600
+ - The actual value for example in <Labels /> tag is producing start, end, and labels.
601
+
602
+ If any of these validations fail, the function immediately
603
+ returns False. If all validations pass for a region, it
604
+ returns True.
605
+
606
+ Args:
607
+ region (dict): The region to be validated.
608
+
609
+ Returns:
610
+ bool: True if all checks pass for the region, False otherwise.
611
+
612
+ """
613
+ control = self.get_control(region["from_name"])
614
+ obj = self.get_object(region["to_name"])
615
+
616
+ # we should have both items present in the labeling config
617
+ if not control or not obj:
618
+ return False
619
+
620
+ # type of the region should match the tag name
621
+ if control.tag.lower() != region["type"]:
622
+ return False
623
+
624
+ # make sure that in config it connects to the same tag as
625
+ # immplied by the region data
626
+ if region["to_name"] not in control.to_name:
627
+ return False
628
+
629
+ # validate the actual value, for example that <Labels /> tag
630
+ # is producing start, end, and labels
631
+ if not control.validate_value(region["value"]):
632
+ return False
633
+
634
+ return True
635
+
636
+ ### Generation
637
+
638
+ def _sample_task(self, secure_mode=False):
639
+ """ """
640
+ # predefined_task, annotations, predictions = get_task_from_labeling_config(label_config)
641
+ generated_task = self.generate_sample_task(
642
+ mode="editor_preview", secure_mode=secure_mode
643
+ )
644
+
645
+ if self._sample_config_task is not None:
646
+ generated_task.update(self._sample_config_task)
647
+
648
+ return generated_task, self._sample_config_ann, self._sample_config_pred
649
+
650
+ def generate_sample_task(self, mode="upload", secure_mode=False):
651
+ """Generates a sample task based on the provided mode and
652
+ secure_mode.
653
+
654
+ This function generates an example value for each object in
655
+ `self.objects` using the specified `mode` and
656
+ `secure_mode`. The resulting task is a dictionary where each
657
+ key-value pair denotes an object's value-name and example
658
+ value.
659
+
660
+ Args:
661
+ mode (str, optional): The operation mode. Accepts any string but defaults to 'upload'.
662
+ secure_mode (bool, optional): The security mode. Defaults to False.
663
+
664
+ Returns:
665
+ dict: A dictionary representing the sample task.
666
+
667
+ """
668
+ task = {
669
+ obj.value_name: obj.generate_example_value(
670
+ mode=mode, secure_mode=secure_mode
671
+ )
672
+ for obj in self.objects
673
+ }
674
+
675
+ return task
676
+
677
+ def generate_sample_annotation(self):
678
+ """ """
679
+ raise NotImplemented()
680
+
681
+ #####
682
+ ##### COMPATIBILITY LAYER
683
+ #####
684
+ ##### This are re-implmenetation of functions found in different
685
+ ##### label_config.py files across the repo. Not all of this were
686
+ ##### tested, therefore I suggest to write a test first, and then
687
+ ##### replace where it's being used in the repo.
688
+
689
+ def config_essential_data_has_changed(self, new_config_str):
690
+ """Detect essential changes of the labeling config"""
691
+ new_obj = LabelInterface(config=new_config_str)
692
+
693
+ for new_tag_name, new_tag in new_obj._controls.items():
694
+ if new_tag_name not in self._controls:
695
+ return True
696
+
697
+ old_tag = self._controls[new_tag_name]
698
+
699
+ if new_tag.tag != old_tag.tag:
700
+ return True
701
+ if new_tag.objects != old_tag.objects:
702
+ return True
703
+ if not set(old_tag.labels).issubset(new_tag.labels):
704
+ return True
705
+
706
+ return False
707
+
708
+ def generate_sample_task_without_check(
709
+ label_config, mode="upload", secure_mode=False
710
+ ):
711
+ """ """
712
+ raise NotImplemented()
713
+
714
+ @classmethod
715
+ def get_task_from_labeling_config(cls, config):
716
+ """Get task, annotations and predictions from labeling config comment,
717
+ it must start from "<!-- {" and end as "} -->"
718
+ """
719
+ # try to get task data, annotations & predictions from config comment
720
+ task_data, annotations, predictions = {}, None, None
721
+ start = config.find("<!-- {")
722
+ start = start if start >= 0 else config.find("<!--{")
723
+ start += 4
724
+ end = config[start:].find("-->") if start >= 0 else -1
725
+
726
+ if 3 < start < start + end:
727
+ try:
728
+ # logger.debug('Parse ' + config[start : start + end])
729
+ body = json.loads(config[start : start + end])
730
+ except Exception:
731
+ # logger.error("Can't parse task from labeling config", exc_info=True)
732
+ pass
733
+ else:
734
+ # logger.debug(json.dumps(body, indent=2))
735
+ dont_use_root = "predictions" in body or "annotations" in body
736
+ task_data = (
737
+ body["data"]
738
+ if "data" in body
739
+ else (None if dont_use_root else body)
740
+ )
741
+ predictions = body["predictions"] if "predictions" in body else None
742
+ annotations = body["annotations"] if "annotations" in body else None
743
+
744
+ return task_data, annotations, predictions
745
+
746
+ @classmethod
747
+ def config_line_stipped(self, c):
748
+ tree = etree.fromstring(c, forbid_dtd=False)
749
+ comments = tree.xpath("//comment()")
750
+
751
+ for c in comments:
752
+ p = c.getparent()
753
+ if p is not None:
754
+ p.remove(c)
755
+ c = etree.tostring(tree, method="html").decode("utf-8")
756
+
757
+ return c.replace("\n", "").replace("\r", "")
758
+
759
+ def get_all_control_tag_tuples(label_config):
760
+ """ """
761
+ return [tag.as_tuple() for tag in self.controls]
762
+
763
+ def get_first_tag_occurence(
764
+ self,
765
+ control_type: Union[str, Tuple],
766
+ object_type: Union[str, Tuple],
767
+ name_filter: Optional[Callable] = None,
768
+ to_name_filter: Optional[Callable] = None,
769
+ ) -> Tuple[str, str, str]:
770
+ """
771
+ Reads config and fetches the first control tag along with first object tag that matches the type.
772
+
773
+ Args:
774
+ control_type (str or tuple): The control type for checking tag matches.
775
+ object_type (str or tuple): The object type for checking tag matches.
776
+ name_filter (function, optional): If given, only tags with this name will be considered.
777
+ Default is None.
778
+ to_name_filter (function, optional): If given, only tags with this name will be considered.
779
+ Default is None.
780
+
781
+ Returns:
782
+ tuple: (from_name, to_name, value), representing control tag, object tag and input value.
783
+ """
784
+
785
+ for tag in self.controls:
786
+ if tag.match(control_type, name_filter_fn=name_filter):
787
+ for object_tag in tag.objects:
788
+ if object_tag.match(object_type, to_name_filter_fn=to_name_filter):
789
+ return tag.name, object_tag.name, object_tag.value_name
790
+
791
+ raise ValueError(
792
+ f"No control tag of type {control_type} and object tag of type {object_type} found in label config"
793
+ )
794
+
795
+ def get_all_labels(self):
796
+ """ """
797
+ dynamic_values = {c.name: True for c in self.controls if c.dynamic_value}
798
+ return self._labels, dynamic_values
799
+
800
+ def get_all_object_tag_names(self):
801
+ """ """
802
+ return self._objects.keys()
803
+
804
+ def extract_data_types(self):
805
+ """ """
806
+ return self._objects
807
+
808
+ def is_video_object_tracking(self):
809
+ """ """
810
+ match_fn = lambda tag: tag.tag.lower() in _VIDEO_TRACKING_TAGS
811
+ tags = self.find_tags(match_fn=match_fn)
812
+
813
+ return bool(tags)
814
+
815
+ def is_type(self, tag_type=None):
816
+ """ """
817
+ raise NotImplemented
818
+
819
+ # NOTE: you can use validate() instead
820
+ # def validate_label_config(self, config_string):
821
+ # # xml and schema
822
+ # self._schema_validation(config_string)
823
+ # self._unique_names_validation(config_string)
824
+ # self._to_name_validation(config_string)
825
+
826
+ def validate_config_using_summary(self, summary, strict=False):
827
+ """Validate current config using LS Project Summary"""
828
+ # this is a rewrite of project.validate_config function
829
+ # self.validate_label_config(config_string)
830
+ if not self._objects:
831
+ return False
832
+
833
+ created_labels = summary.created_labels
834
+ created_labels_drafts = summary.created_labels_drafts
835
+ annotations_summary = summary.created_annotations
836
+
837
+ self.validate_annotations_consistency(annotations_summary)
838
+ self.validate_lables_consistency(created_labels, created_labels_drafts)
839
+
840
+ def validate_lables_consistency(self, created_labels, created_labels_drafts):
841
+ """ """
842
+ # validate labels consistency
843
+ # labels_from_config, dynamic_values_tags = self.get_all_labels(config_string)
844
+
845
+ created_labels = merge_labels_counters(created_labels, created_labels_drafts)
846
+
847
+ # <Labels name="sentinement" ...><Label value="Negative" ... />
848
+ # {'sentiment': {'Negative': 1, 'Positive': 3, 'Neutral': 1}}
849
+
850
+ for control_tag_from_data, labels_from_data in created_labels.items():
851
+ # Check if labels created in annotations, and their control tag has been removed
852
+ control_from_config = self.get_control(control_tag_from_data)
853
+
854
+ if labels_from_data and not control_from_config:
855
+ raise LabelStudioValidationErrorSentryIgnored(
856
+ f"There are {sum(labels_from_data.values(), 0)} annotation(s) created with tag "
857
+ f'"{control_tag_from_data}", you can\'t remove it'
858
+ )
859
+
860
+ removed_labels = []
861
+ # Check that labels themselves were not removed
862
+ for label_name, label_value in labels_from_data.items():
863
+ if label_value > 0 and not control_from_config.labels_attrs.get(
864
+ label_name, None
865
+ ):
866
+ # that label was used in labeling before, but not
867
+ # present in the current config
868
+ removed_labels.append(label_name)
869
+
870
+ # TODO that needs to be added back
871
+ # if 'VideoRectangle' in tag_types:
872
+ # for key in labels_from_config:
873
+ # labels_from_config_by_tag |= set(labels_from_config[key])
874
+
875
+ # if 'Taxonomy' in tag_types:
876
+ # custom_tags = Label.objects.filter(links__project=self).values_list('value', flat=True)
877
+ # flat_custom_tags = set([item for sublist in custom_tags for item in sublist])
878
+ # labels_from_config_by_tag |= flat_custom_tags
879
+
880
+ if len(removed_labels):
881
+ raise LabelStudioValidationErrorSentryIgnored(
882
+ f'These labels still exist in annotations or drafts:\n{",".join(removed_labels)}'
883
+ f'Please add labels to tag with name="{str(control_tag_from_data)}".'
884
+ )
885
+
886
+ def validate_annotations_consistency(self, annotations_summary):
887
+ """ """
888
+ # annotations_summary is coming from LS Project Summary, it's
889
+ # format is: { "chc|text|choices": 10 }
890
+ # which means that there are two tags, Choices, and one of
891
+ # object tags and there are 10 annotations
892
+
893
+ err = []
894
+ annotations_from_data = set(annotations_summary)
895
+
896
+ for ann in annotations_from_data:
897
+ from_name, to_name, tag_type = ann.split("|")
898
+
899
+ # avoid textarea to_name check (see DEV-1598)
900
+ if tag_type.lower() == "textarea":
901
+ continue
902
+
903
+ try:
904
+ control = self.get_control(from_name)
905
+ if not control or not control.get_object(to_name):
906
+ err.append(
907
+ f"with from_name={from_name}, to_name={to_name}, type={tag_type}"
908
+ )
909
+ except Exception as ex:
910
+ err.append(
911
+ f"Error occurred while processing from_name={from_name}, to_name={to_name}, type={tag_type}, error: {str(ex)}"
912
+ )
913
+
914
+ # control = self.get_control(from_name)
915
+ # if not control or not control.get_object(to_name):
916
+ # err.append(f'with from_name={from_name}, to_name={to_name}, type={tag_type}')
917
+
918
+ if err:
919
+ diff_str = "\n".join(err)
920
+ raise LabelStudioValidationErrorSentryIgnored(
921
+ f"Created annotations are incompatible with provided labeling schema, we found:\n{diff_str}"
922
+ )