pixeltable 0.2.8__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (77) hide show
  1. pixeltable/__init__.py +15 -33
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/catalog.py +1 -1
  4. pixeltable/catalog/column.py +29 -11
  5. pixeltable/catalog/dir.py +2 -2
  6. pixeltable/catalog/insertable_table.py +5 -55
  7. pixeltable/catalog/named_function.py +2 -2
  8. pixeltable/catalog/schema_object.py +2 -7
  9. pixeltable/catalog/table.py +307 -186
  10. pixeltable/catalog/table_version.py +109 -63
  11. pixeltable/catalog/table_version_path.py +28 -5
  12. pixeltable/catalog/view.py +20 -10
  13. pixeltable/dataframe.py +128 -25
  14. pixeltable/env.py +29 -18
  15. pixeltable/exec/exec_context.py +5 -0
  16. pixeltable/exec/exec_node.py +1 -0
  17. pixeltable/exec/in_memory_data_node.py +29 -24
  18. pixeltable/exec/sql_scan_node.py +1 -1
  19. pixeltable/exprs/column_ref.py +13 -8
  20. pixeltable/exprs/data_row.py +4 -0
  21. pixeltable/exprs/expr.py +16 -1
  22. pixeltable/exprs/function_call.py +4 -4
  23. pixeltable/exprs/row_builder.py +29 -20
  24. pixeltable/exprs/similarity_expr.py +4 -3
  25. pixeltable/ext/functions/yolox.py +2 -1
  26. pixeltable/func/__init__.py +1 -0
  27. pixeltable/func/aggregate_function.py +14 -12
  28. pixeltable/func/callable_function.py +8 -6
  29. pixeltable/func/expr_template_function.py +13 -19
  30. pixeltable/func/function.py +3 -6
  31. pixeltable/func/query_template_function.py +84 -0
  32. pixeltable/func/signature.py +68 -23
  33. pixeltable/func/udf.py +13 -10
  34. pixeltable/functions/__init__.py +6 -91
  35. pixeltable/functions/eval.py +26 -14
  36. pixeltable/functions/fireworks.py +25 -23
  37. pixeltable/functions/globals.py +62 -0
  38. pixeltable/functions/huggingface.py +20 -16
  39. pixeltable/functions/image.py +170 -1
  40. pixeltable/functions/openai.py +95 -128
  41. pixeltable/functions/string.py +10 -2
  42. pixeltable/functions/together.py +95 -84
  43. pixeltable/functions/util.py +16 -0
  44. pixeltable/functions/video.py +94 -16
  45. pixeltable/functions/whisper.py +78 -0
  46. pixeltable/globals.py +1 -1
  47. pixeltable/io/__init__.py +10 -0
  48. pixeltable/io/external_store.py +370 -0
  49. pixeltable/io/globals.py +51 -22
  50. pixeltable/io/label_studio.py +639 -0
  51. pixeltable/io/parquet.py +1 -1
  52. pixeltable/iterators/__init__.py +9 -0
  53. pixeltable/iterators/string.py +40 -0
  54. pixeltable/metadata/__init__.py +6 -8
  55. pixeltable/metadata/converters/convert_10.py +2 -4
  56. pixeltable/metadata/converters/convert_12.py +7 -2
  57. pixeltable/metadata/converters/convert_13.py +6 -8
  58. pixeltable/metadata/converters/convert_14.py +2 -4
  59. pixeltable/metadata/converters/convert_15.py +44 -0
  60. pixeltable/metadata/converters/convert_16.py +18 -0
  61. pixeltable/metadata/converters/util.py +66 -0
  62. pixeltable/metadata/schema.py +3 -3
  63. pixeltable/plan.py +8 -7
  64. pixeltable/store.py +1 -1
  65. pixeltable/tool/create_test_db_dump.py +147 -54
  66. pixeltable/tool/embed_udf.py +9 -0
  67. pixeltable/type_system.py +1 -2
  68. pixeltable/utils/code.py +34 -0
  69. {pixeltable-0.2.8.dist-info → pixeltable-0.2.9.dist-info}/METADATA +1 -1
  70. pixeltable-0.2.9.dist-info/RECORD +131 -0
  71. pixeltable/datatransfer/__init__.py +0 -1
  72. pixeltable/datatransfer/label_studio.py +0 -452
  73. pixeltable/datatransfer/remote.py +0 -85
  74. pixeltable/functions/pil/image.py +0 -147
  75. pixeltable-0.2.8.dist-info/RECORD +0 -124
  76. {pixeltable-0.2.8.dist-info → pixeltable-0.2.9.dist-info}/LICENSE +0 -0
  77. {pixeltable-0.2.8.dist-info → pixeltable-0.2.9.dist-info}/WHEEL +0 -0
@@ -0,0 +1,34 @@
1
+ import types
2
+ from typing import Optional
3
+
4
+ from pixeltable.func import Function
5
+
6
+
7
+ # Utilities related to the organization of the Pixeltable codebase.
8
+
9
+ def local_public_names(mod_name: str, exclude: Optional[list[str]] = None) -> list[str]:
10
+ """
11
+ Returns a list of all functions and submodules that are local to the specified module and are
12
+ publicly accessible. Intended to facilitate implementation of module __dir__() methods for
13
+ friendly tab-completion.
14
+ """
15
+ import importlib
16
+
17
+ if exclude is None:
18
+ exclude = []
19
+ mod = importlib.import_module(mod_name)
20
+ names = []
21
+ for obj in mod.__dict__.values():
22
+ if isinstance(obj, Function):
23
+ # Pixeltable function
24
+ names.append(obj.name)
25
+ elif isinstance(obj, types.FunctionType):
26
+ # Python function
27
+ if obj.__module__ == mod.__name__ and not obj.__name__.startswith('_'):
28
+ names.append(obj.__name__)
29
+ elif isinstance(obj, types.ModuleType):
30
+ # Module
31
+ components = obj.__name__.split('.')
32
+ if mod_name == '.'.join(components[:-1]):
33
+ names.append(components[-1])
34
+ return [name for name in names if name not in exclude]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pixeltable
3
- Version: 0.2.8
3
+ Version: 0.2.9
4
4
  Summary: Pixeltable: The Multimodal AI Data Plane
5
5
  Author: Marcel Kornacker
6
6
  Author-email: marcelk@gmail.com
@@ -0,0 +1,131 @@
1
+ pixeltable/__init__.py,sha256=GNlV3jQKDQKTrOq8BVvPsnd9W9ROneP9-1C-zgNSWsg,1275
2
+ pixeltable/__version__.py,sha256=ZH1jMRnmzFfm1T-NAz5ZJ08lYojBz6jzzdMQmFBGDzw,112
3
+ pixeltable/catalog/__init__.py,sha256=E41bxaPeQIcgRYzTWc2vkDOboQhRymrJf4IcHQO7o_8,453
4
+ pixeltable/catalog/catalog.py,sha256=gzEuse0f_rj7oKRWMyorhUpOLhYbWSQ8CFLOuFLbXak,7918
5
+ pixeltable/catalog/column.py,sha256=Dmc6CgFLExJy3tdvuX0Emjc8SgqZvmCbAHozibO1-G0,9417
6
+ pixeltable/catalog/dir.py,sha256=DWl9nnCOoiYLKWp31MNMvLmryXeQiQZu5YJcd4tpy38,921
7
+ pixeltable/catalog/globals.py,sha256=yLEGNbsSnLzjWNHVJacfjA9hbw13Q6QXLOSCRmdTlq0,943
8
+ pixeltable/catalog/insertable_table.py,sha256=jP6EFMqkGNAbqRAj6YpCaZzWdFwu13t_dQste_f0clk,6468
9
+ pixeltable/catalog/named_function.py,sha256=UhHaimM_uJHS-0RQcqGOgvWeZtMfKsIgSeKSRwT2moU,1149
10
+ pixeltable/catalog/path.py,sha256=QgccEi_QOfaKt8YsR2zLtd_z7z7QQkU_1kprJFi2SPQ,1677
11
+ pixeltable/catalog/path_dict.py,sha256=xfvxg1Ze5jZCARUGASF2DRbQPh7pRVTYhuJ_u82gYUo,5941
12
+ pixeltable/catalog/schema_object.py,sha256=Sf8aGARpsPeRUz1NjsZ97oSBMyElsctP39uU9x9Ic80,964
13
+ pixeltable/catalog/table.py,sha256=qzxm3SN0hHObZlJycEsmzOd3VLsqSnrZ9fk1M3F5YGM,42636
14
+ pixeltable/catalog/table_version.py,sha256=-46Z-aSKdjXpbmWlwZpGWZMw6VOKJZczNvx05Txe_0I,55148
15
+ pixeltable/catalog/table_version_path.py,sha256=lE2EFn0kylXhy-oBXTyXbJox6rl_6klfOmDjK-2Uq_Q,6432
16
+ pixeltable/catalog/view.py,sha256=dphf9ZFi_ReQkokcN_Auh6ZQFmGtnOcVDvz6fcJfJzs,10332
17
+ pixeltable/dataframe.py,sha256=cQb70E0WVWsTQ_rA6CNx0dfCkGQoqgdoERa6HgpdPDY,37167
18
+ pixeltable/env.py,sha256=WO_WLfRj9Fft6QyW89S9cw47RTg1ALviStu9pNygJEQ,21635
19
+ pixeltable/exceptions.py,sha256=MSP9zeL0AmXT93XqjdvgGN4rzno1_KRrGriq6hpemnw,376
20
+ pixeltable/exec/__init__.py,sha256=RK7SKvrQ7Ky3G_LXDP4Bf7lHmMM_uYZl8dJaZYs0FjY,454
21
+ pixeltable/exec/aggregation_node.py,sha256=cf6rVAgrGh_uaMrCIgXJIwQTmbcboJlnrH_MmPIQSd0,3321
22
+ pixeltable/exec/cache_prefetch_node.py,sha256=d5pEuR6AtJQkEVy9X3XeYFI_q0szMtoNAH96vYdtBE0,5241
23
+ pixeltable/exec/component_iteration_node.py,sha256=Uz6zEeaJMcbvF3S0W0qmLI_uWsZsaSspHKNzuAMrasg,4069
24
+ pixeltable/exec/data_row_batch.py,sha256=1IDYHBkSQ60dwOnAGnS-Wpp3AsnbMqKcY40zUT7ku-Q,3392
25
+ pixeltable/exec/exec_context.py,sha256=0rg5V8HzSy-BvqmSbGr-U4aJ4eOZg2JN0x6zjYQGtBc,1090
26
+ pixeltable/exec/exec_node.py,sha256=ixkv3p_EfF53UDWgwLjQGKR1LNIQxzgDXsTzzJj6ea4,2211
27
+ pixeltable/exec/expr_eval_node.py,sha256=fEzbeZ0J-kylRQ2M0nSlUeLFRTHlwNzlvBo1yqWQ2rg,10856
28
+ pixeltable/exec/in_memory_data_node.py,sha256=vvjr5-r94mRddOYIpKF8zDWCJPJrG0YpQVtYSyKlyVU,3502
29
+ pixeltable/exec/media_validation_node.py,sha256=OKfRyKpcn7AZdACy_HD4NsDC87ZfNFs1tdrQz2NiIVw,1514
30
+ pixeltable/exec/sql_scan_node.py,sha256=CGUoBKnhuDDkLepqCmS-1vjSvG8aiIhvAZHdZZFTRSM,10295
31
+ pixeltable/exprs/__init__.py,sha256=7dwrdk-NpF66OT-m5yNtFEhq-o1T476dnXHjluw2K1s,951
32
+ pixeltable/exprs/arithmetic_expr.py,sha256=sWBYCBKI6IHj9ASwDcm2BlkQ5gleVtKtmpiPvzFNBJM,4386
33
+ pixeltable/exprs/array_slice.py,sha256=VmWc6iFusrM85MjyEBBCfXG1Jnt8-Gr6-J88BXxNoOE,2131
34
+ pixeltable/exprs/column_property_ref.py,sha256=0PHiBys0fxe2LgjaMId5UHob4E-ZggyPLnnW41RgA0E,2706
35
+ pixeltable/exprs/column_ref.py,sha256=jmKUHuRev3PI-58BqYQVcBPLQ3E7bM1mPlFSVlwa5b0,5554
36
+ pixeltable/exprs/comparison.py,sha256=hP3M_lMWcFgENBICFosZPw2lRm1R6_qM_O9bKPmWJGI,4789
37
+ pixeltable/exprs/compound_predicate.py,sha256=Gh22MKi625m5A_RunVRd-a1XFi-fitikqBVz2VNXKrs,3830
38
+ pixeltable/exprs/data_row.py,sha256=RTBw1cBt29g_9g_hgdEYZ5aiHl7WZMBaBC2fOMOfwOc,8668
39
+ pixeltable/exprs/expr.py,sha256=mxpBHOiKbHRX9FXLZWd5UjhPu2NjuXsO6v1RRjuH4GY,24813
40
+ pixeltable/exprs/expr_set.py,sha256=Q64Q2yI0CTq2Ma_E-BUYlMotSstVuMm4OFZnBCedHRk,1222
41
+ pixeltable/exprs/function_call.py,sha256=UKqErJyPUa6-Q-SqkVsK_tT4ti--z_YvQEmj1jHGc8w,17149
42
+ pixeltable/exprs/globals.py,sha256=KhK4xwkLHv4NsXXcLdjRu2OFSvEnlC7GG-8Gs_IbQtI,1858
43
+ pixeltable/exprs/image_member_access.py,sha256=KSYdTIaLh53dNRjv3SJFchPMPo7o5diJSQkV1NsyB4Y,3547
44
+ pixeltable/exprs/in_predicate.py,sha256=burxrBCH1MXqU-wrNWJvD0PRGzJdWy85intOSftQK54,3696
45
+ pixeltable/exprs/inline_array.py,sha256=293WuUEhYXrcp8-AnPDVIWQBPQMrPviB88A619Ls_Es,4499
46
+ pixeltable/exprs/inline_dict.py,sha256=TWYokJ14Nq-evODcYFVO471WSEDbz6cJqIdRb2PkbZQ,3885
47
+ pixeltable/exprs/is_null.py,sha256=nvpOXtQj1UeYJpkCWzbaGuQElzrA2HSG3XNQugOv-pw,1041
48
+ pixeltable/exprs/json_mapper.py,sha256=I60VNgus64ai80gnFCIsRn0VRWYXMkqH5VNvnATsN9s,4559
49
+ pixeltable/exprs/json_path.py,sha256=Wz_5zFsyc9TPhsSbsDjDmQ3Nb0uVIwMCx5nh-cQYBiE,6526
50
+ pixeltable/exprs/literal.py,sha256=5NNza-WL1dd3hNznwwkr_yAcTGXSIRYUszGfy30lruI,2396
51
+ pixeltable/exprs/object_ref.py,sha256=eTcx84aWRI59fIiGvbdv3_cfL0XW4xEFQ4lwpLpJkM8,1250
52
+ pixeltable/exprs/predicate.py,sha256=OSDgjfSqiK7J_5GZMUXMvjfyomKEGi0JNxeB073SGXw,1859
53
+ pixeltable/exprs/row_builder.py,sha256=0OYd51J2ECPHkk2iN3MfYpS7LqnHTV5l5ubsVcy0dJA,15926
54
+ pixeltable/exprs/rowid_ref.py,sha256=74w4rEy21YysTVbyKNc3op-pYFqDAx8VJdtl7ZPpxHs,4268
55
+ pixeltable/exprs/similarity_expr.py,sha256=i5urJiSD43lJIi0AnMZhNvl7q5I8P3BPRuMx4IaAym0,3015
56
+ pixeltable/exprs/type_cast.py,sha256=JMg8p1qYoFfiAXfJPSbTEnfrK7lRO_JMaqlPHOrhNQU,1793
57
+ pixeltable/exprs/variable.py,sha256=Kg_O4ytcHYZFijIyMHYBJn063cTKU1-YE583FAz8Qaw,1361
58
+ pixeltable/ext/__init__.py,sha256=0uugfuME1FybVo-MdxaVNGagRjhcvNTnv5MZUem6Cyo,269
59
+ pixeltable/ext/functions/whisperx.py,sha256=CnpSPZJgufXa01vgUubVkyxQuZIdublJzkwbm5kS1YQ,1078
60
+ pixeltable/ext/functions/yolox.py,sha256=8e-S9SD0xpwkMlSHnpm6aZc7lToHZnEnbZvM_M1alwU,3688
61
+ pixeltable/func/__init__.py,sha256=WjftUGyNkd6bF_qSxqZ5Gd7Elf8oExb3dUlpydhdFTo,407
62
+ pixeltable/func/aggregate_function.py,sha256=PCY-pW1cJZPx3rsHzcBDddmDoxV7IEt5n3afyeAhZ8M,9329
63
+ pixeltable/func/callable_function.py,sha256=eD-h1KlihM4SDbCKr2HJtP8XTxAV7L0bxPCdyt-71JY,4598
64
+ pixeltable/func/expr_template_function.py,sha256=_5xkhODJRiObOAPErvVhlpLaBpXTcjhovibCeB_1AB0,4058
65
+ pixeltable/func/function.py,sha256=cVmu3scwveMc2hgcvhAkAEuqvahL1sKD-txz4bs21pc,5539
66
+ pixeltable/func/function_registry.py,sha256=1ibSQxEPm3Zd3r497vSlckQiDG9sfCnyJx3zcSm9t7c,11456
67
+ pixeltable/func/globals.py,sha256=sEwn6lGgHMp6VQORb_P5qRd_-Q2_bUSqvqM9-XPN_ec,1483
68
+ pixeltable/func/query_template_function.py,sha256=BUU0KZYkqVPjIFg6g6msU4PzVGf2fkEKveDaEMGXhzI,3680
69
+ pixeltable/func/signature.py,sha256=6Lwf32auSnmhYGrN4NkHk07BmG2a73TNICbWnelkH1s,8638
70
+ pixeltable/func/udf.py,sha256=TtFgA9gXUGasd3yj22S6NAsanPJhmmZI_ewbcrTB_VY,6798
71
+ pixeltable/functions/__init__.py,sha256=mC_0y7aLAqamgOj94S5e9yQ4HbnqPiXy-1_t2s6SPqY,290
72
+ pixeltable/functions/eval.py,sha256=FMn95e0GG-ge7ss6dkDOV-7KQK9mSPvn-UyFB2rgBFI,8492
73
+ pixeltable/functions/fireworks.py,sha256=sbZy3HRn8o01nT4k1lOJJ_jGrjhBNkYmj1_TraoYCiM,1090
74
+ pixeltable/functions/globals.py,sha256=MVKcwvfRaqBMV58KBxp5ACMsTbj29WD97AOg1N5ZheY,1596
75
+ pixeltable/functions/huggingface.py,sha256=qFdFpBX4R8Iz6fB0PKOwicdfvnEV7DQhtdcvsIQz55I,7372
76
+ pixeltable/functions/image.py,sha256=lC5PVvx0hXlINGcURLrLeTT7xUMXla6QUC1T9t4-A0Y,5440
77
+ pixeltable/functions/openai.py,sha256=Q6imhdlyZRuWNgBq9msqXMT8up_0e9jSsZPfSJ5DgUY,7839
78
+ pixeltable/functions/string.py,sha256=Ae_weygd9Aj98buLC4tPLRYGg3LGSJEpXaqr93TF4nw,645
79
+ pixeltable/functions/together.py,sha256=2vHOoXMUIpeYwTYGTr3hDHePzy8zepvoeyORgV_9n34,4412
80
+ pixeltable/functions/util.py,sha256=F2iiIL7UfhYdCVzdCa3efYqWbaeLKFrbycKnuPkG57M,650
81
+ pixeltable/functions/video.py,sha256=yn52MimAVrSzUyAUtyxbd1RWveX_TyjwsomBuhK9V60,6516
82
+ pixeltable/functions/whisper.py,sha256=0lsfaGPGmJCFh2Jh-RtzwLhjo9BA83X3l7ot15fNEiA,2406
83
+ pixeltable/globals.py,sha256=WeIQbN6peyJL3VbIBxtL21p7XKREpDmtZ_MuDEUmOu4,14158
84
+ pixeltable/index/__init__.py,sha256=XBwetNQQwnz0fiKwonOKhyy_U32l_cjt77kNvEIdjWs,102
85
+ pixeltable/index/base.py,sha256=YAQ5Dz1mfI0dfu9rxWHWroE8TjB90yKfPtXAzoADq38,1568
86
+ pixeltable/index/btree.py,sha256=NE4GYhcJWYJhdKyeHI0sQBlFvUaIgGOF9KLyCZOfFjE,1822
87
+ pixeltable/index/embedding_index.py,sha256=AYphEggN-0B4GNrm4nMmi46CEtrQw5tguyk67BK2sWo,7627
88
+ pixeltable/io/__init__.py,sha256=DdqOteR1Y-yRvFS0VojXHryBtIGzH8nAN-1MBj3LGRk,493
89
+ pixeltable/io/external_store.py,sha256=ZtDkfHMBo73JMKJ-q3J5FMWztYog38APn8kg81nCg34,16434
90
+ pixeltable/io/globals.py,sha256=lfaXosEu1e2xkWhwUCUJNhpnZxXlBdf6jPlf8cf4F50,4546
91
+ pixeltable/io/hf_datasets.py,sha256=h5M1NkXOvEU8kaeT3AON1A18Vmhnc1lVo5a3TZ5AAic,8004
92
+ pixeltable/io/label_studio.py,sha256=ZWXTuIFk4CheM2ON0VDyb_34L9BJUY7iUJLcPvrll7c,28782
93
+ pixeltable/io/pandas.py,sha256=cDHUDW2CGiBbsEJB9zE5vkXopTKxDdI-CZxNcp0OnIk,6478
94
+ pixeltable/io/parquet.py,sha256=i4hvYHsARe2GnZHxNmI66Vf3tr1sIFLN6KGCJYvH3o8,8149
95
+ pixeltable/iterators/__init__.py,sha256=sjldFckkT8aVRiKgEP6faeAK2NQBdzbmpwAeRhI1FkM,366
96
+ pixeltable/iterators/base.py,sha256=cnEh1tNN2JAxRzrLTg3dhun3N1oNQ8vifCm6ts3_UiE,1687
97
+ pixeltable/iterators/document.py,sha256=netSCJatG8NcgbHZ69BvQVICdAorQlYi8OlcpqwLQD4,19436
98
+ pixeltable/iterators/string.py,sha256=NG_fWc_GAITDfzl6MvrDOMrSoMcZdMZf6hPQztCSatE,1305
99
+ pixeltable/iterators/video.py,sha256=xtxODL1AfZwTfHVzWekhTCLA8gwTJIvJFdxC0KecD9Q,3836
100
+ pixeltable/metadata/__init__.py,sha256=uxufhyUkogiMhztO3OoS59Rqs5pWOHqFJqPO5mZfrVA,2172
101
+ pixeltable/metadata/converters/convert_10.py,sha256=J1_r7LNNAWTdb042AwqFpJ4sEB-i4qhUdk5iOjcZk34,719
102
+ pixeltable/metadata/converters/convert_12.py,sha256=Ci-qyZW1gqci-8wnjeOB5afdq7KTuN-hVSV9OqSPx8g,162
103
+ pixeltable/metadata/converters/convert_13.py,sha256=yFR6lD3pOrZ46ZQBFKYvxiIYa7rRxh46Bsq7yiCBNak,1356
104
+ pixeltable/metadata/converters/convert_14.py,sha256=o4Dwu5wujJYILN-2chg3xCSUsh4cnn0sImv6rc75rSM,388
105
+ pixeltable/metadata/converters/convert_15.py,sha256=N-Lt3OdOrUprN-z1gFcxniZgAtZ7jzup_YUZzXX6EtY,1709
106
+ pixeltable/metadata/converters/convert_16.py,sha256=SvcWOYgLwRw_gLTnLbCSI9f2cpdkXazYOmmtJUOOzv4,476
107
+ pixeltable/metadata/converters/util.py,sha256=AcYs3yUICl93y8whf0pkeWZoCzE4JuUMafmcYMyJUCY,2618
108
+ pixeltable/metadata/schema.py,sha256=WJZ1YPgS88rFElXbjYgDhcrI4VReR1I9VPOnTkoHvoI,8418
109
+ pixeltable/plan.py,sha256=MXWgwQXD40GB57xQiq_wjXF3OL0XTEjjhQslMfFTt3w,32831
110
+ pixeltable/store.py,sha256=UDn-UMYuL6dTUym3yFsVhv9hUtnP_QtzhDJzsFInApc,18853
111
+ pixeltable/tool/create_test_db_dump.py,sha256=AYYHJKSDx7CMjNH6WUoOlUCICFZKRqQn7sSL3jKkDEw,10727
112
+ pixeltable/tool/create_test_video.py,sha256=OLfccymYReIpzE8osZn4rQvLXxxiPC_l0vc06U74hVM,2899
113
+ pixeltable/tool/embed_udf.py,sha256=llHUhjGnCMp7Wyz7eHgKZV2v6o2ZWSgLQKscESuHK_o,269
114
+ pixeltable/type_system.py,sha256=oXnDVoP90ic6WSTF_DcgWDLx0MYKEU0ggGTesAKahic,29505
115
+ pixeltable/utils/__init__.py,sha256=UYlrf6TIWJT0g-Hac0b34-dEk478B5Qx8dGco34YlIk,439
116
+ pixeltable/utils/arrow.py,sha256=83_7aG5UR2qtTktw_otLkQs-RQbLk0VVM0JLJkbweNU,3692
117
+ pixeltable/utils/coco.py,sha256=ISpFBhR4eO1jOcg_SPb0thVI4KdS6H0RyNQauZIA5A4,7287
118
+ pixeltable/utils/code.py,sha256=AOw1u2r8_DQXpX-lxJhyHWARGrCRDXOJHFVgKOi54Uc,1231
119
+ pixeltable/utils/documents.py,sha256=Q7e5U2Hk0go83MdKzD_MIiMscwbcFsLMgRw2IU_vQF4,2213
120
+ pixeltable/utils/filecache.py,sha256=UoNONG2VaAc2IBB0e3sQdsvyOPOes2XSDc5_CsA4qek,7839
121
+ pixeltable/utils/help.py,sha256=cCnxJ4VP9MJ57iDqExmnDcM-JG3a1lw_q7g-D7bpSVI,252
122
+ pixeltable/utils/http_server.py,sha256=WQ5ILMzlz4TlwI9j5YqAPgEZyhrN1GytMNDbLD9occk,2422
123
+ pixeltable/utils/media_store.py,sha256=x71wnJDZDHcdd13VCfL4AkHQ6IJB41gNA-zBvXJwFos,3116
124
+ pixeltable/utils/pytorch.py,sha256=BR4tgfUWw-2rwWTOgzXj5qdMBpe1Arpp5SK4ax6jjpk,3483
125
+ pixeltable/utils/s3.py,sha256=rkanuhk9DWvSfmbOLQW1j1Iov4sl2KhxGGKN-AJ8LSE,432
126
+ pixeltable/utils/sql.py,sha256=5n5_OmXAGtqFdL6z5XvgnU-vlx6Ba6f1WJrO1ZwUle8,765
127
+ pixeltable/utils/transactional_directory.py,sha256=UGzCrGtLR3hEEf8sYGuWBzLVFAEQml3vdIavigWeTBM,1349
128
+ pixeltable-0.2.9.dist-info/LICENSE,sha256=0UNMmwuqWPC0xDY1NWMm4uNJ2_MyA1pnTNRgQTvuBiQ,746
129
+ pixeltable-0.2.9.dist-info/METADATA,sha256=YInl_cYNpCosk7ENDDVZ9GGfFKCuKB3ZyWsGiezTrRI,9806
130
+ pixeltable-0.2.9.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
131
+ pixeltable-0.2.9.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- from .remote import Remote
@@ -1,452 +0,0 @@
1
- import logging
2
- import os
3
- from dataclasses import dataclass
4
- from pathlib import Path
5
- from typing import Any, Iterator, Optional
6
- from xml.etree import ElementTree
7
-
8
- import PIL.Image
9
- import label_studio_sdk
10
- import more_itertools
11
- from requests.exceptions import HTTPError
12
-
13
- import pixeltable as pxt
14
- import pixeltable.env as env
15
- import pixeltable.exceptions as excs
16
- from pixeltable import Table
17
- from pixeltable.datatransfer.remote import Remote
18
- from pixeltable.utils import coco
19
-
20
- _logger = logging.getLogger('pixeltable')
21
-
22
-
23
- @env.register_client('label_studio')
24
- def _(api_key: str, url: str) -> label_studio_sdk.Client:
25
- return label_studio_sdk.Client(api_key=api_key, url=url)
26
-
27
-
28
- def _label_studio_client() -> label_studio_sdk.Client:
29
- return env.Env.get().get_client('label_studio')
30
-
31
-
32
- class LabelStudioProject(Remote):
33
- """
34
- A [`Remote`][pixeltable.datatransfer.Remote] that represents a Label Studio project, providing functionality
35
- for synchronizing between a Pixeltable table and a Label Studio project.
36
-
37
- The API key and URL for a valid Label Studio server must be specified in Pixeltable config. Either:
38
-
39
- * Set the `LABEL_STUDIO_API_KEY` and `LABEL_STUDIO_URL` environment variables; or
40
- * Specify `api_key` and `url` fields in the `label-studio` section of `$PIXELTABLE_HOME/config.yaml`.
41
- """
42
- # TODO(aaron-siegel): Add link in docstring to a Label Studio howto
43
-
44
- def __init__(self, project_id: int):
45
- self.project_id = project_id
46
- self._project: Optional[label_studio_sdk.project.Project] = None
47
-
48
- @classmethod
49
- def create(cls, title: str, label_config: str, **kwargs: Any) -> 'LabelStudioProject':
50
- """
51
- Creates a new Label Studio project, using the Label Studio client configured in Pixeltable.
52
-
53
- Args:
54
- title: The title of the project.
55
- label_config: The Label Studio project configuration, in XML format.
56
- **kwargs: Additional keyword arguments for the new project; these will be passed to `start_project`
57
- in the Label Studio SDK.
58
- """
59
- # Check that the config is valid before creating the project
60
- cls._parse_project_config(label_config)
61
- project = _label_studio_client().start_project(title=title, label_config=label_config, **kwargs)
62
- project_id = project.get_params()['id']
63
- return LabelStudioProject(project_id)
64
-
65
- @property
66
- def project(self) -> label_studio_sdk.project.Project:
67
- """The `Project` object corresponding to this Label Studio project."""
68
- if self._project is None:
69
- try:
70
- self._project = _label_studio_client().get_project(self.project_id)
71
- except HTTPError as exc:
72
- raise excs.Error(f'Could not locate Label Studio project: {self.project_id} '
73
- '(cannot connect to server or project no longer exists)') from exc
74
- return self._project
75
-
76
- @property
77
- def project_params(self) -> dict[str, Any]:
78
- """The parameters of this Label Studio project."""
79
- return self.project.get_params()
80
-
81
- @property
82
- def project_title(self) -> str:
83
- """The title of this Label Studio project."""
84
- return self.project_params['title']
85
-
86
- @property
87
- def _project_config(self) -> '_LabelStudioConfig':
88
- return self._parse_project_config(self.project_params['label_config'])
89
-
90
- def get_export_columns(self) -> dict[str, pxt.ColumnType]:
91
- """
92
- The data keys and preannotation fields specified in this Label Studio project.
93
- """
94
- return self._project_config.export_columns
95
-
96
- def get_import_columns(self) -> dict[str, pxt.ColumnType]:
97
- """
98
- Always contains a single entry:
99
-
100
- ```
101
- {"annotations": pxt.JsonType(nullable=True)}
102
- ```
103
- """
104
- return {ANNOTATIONS_COLUMN: pxt.JsonType(nullable=True)}
105
-
106
- def sync(self, t: Table, col_mapping: dict[str, str], export_data: bool, import_data: bool) -> None:
107
- _logger.info(f'Syncing Label Studio project "{self.project_title}" with table `{t.get_name()}`'
108
- f' (export: {export_data}, import: {import_data}).')
109
- # Collect all existing tasks into a dict with entries `rowid: task`
110
- tasks = {tuple(task['meta']['rowid']): task for task in self._fetch_all_tasks()}
111
- if export_data:
112
- self._create_tasks_from_table(t, col_mapping, tasks)
113
- if import_data:
114
- self._update_table_from_tasks(t, col_mapping, tasks)
115
-
116
- def _fetch_all_tasks(self) -> Iterator[dict]:
117
- page = 1
118
- unknown_task_count = 0
119
- while True:
120
- result = self.project.get_paginated_tasks(page=page, page_size=_PAGE_SIZE)
121
- if result.get('end_pagination'):
122
- break
123
- for task in result['tasks']:
124
- rowid = task['meta'].get('rowid')
125
- if rowid is None:
126
- unknown_task_count += 1
127
- else:
128
- yield task
129
- page += 1
130
- if unknown_task_count > 0:
131
- _logger.warning(
132
- f'Skipped {unknown_task_count} unrecognized task(s) when syncing Label Studio project "{self.project_title}".'
133
- )
134
-
135
- def _update_table_from_tasks(self, t: Table, col_mapping: dict[str, str], tasks: dict[tuple, dict]) -> None:
136
- # `col_mapping` is guaranteed to be a one-to-one dict whose values are a superset
137
- # of `get_import_columns`
138
- assert ANNOTATIONS_COLUMN in col_mapping.values()
139
- annotations_column = next(k for k, v in col_mapping.items() if v == ANNOTATIONS_COLUMN)
140
- updates = [
141
- {
142
- '_rowid': task['meta']['rowid'],
143
- # Replace [] by None to indicate no annotations. We do want to sync rows with no annotations,
144
- # in order to properly handle the scenario where existing annotations have been deleted in
145
- # Label Studio.
146
- annotations_column: task[ANNOTATIONS_COLUMN] if len(task[ANNOTATIONS_COLUMN]) > 0 else None
147
- }
148
- for task in tasks.values()
149
- ]
150
- if len(updates) > 0:
151
- _logger.info(
152
- f'Updating table `{t.get_name()}`, column `{annotations_column}` with {len(updates)} total annotations.'
153
- )
154
- t.batch_update(updates)
155
- annotations_count = sum(len(task[ANNOTATIONS_COLUMN]) for task in tasks.values())
156
- print(f'Synced {annotations_count} annotation(s) from {len(updates)} existing task(s) in {self}.')
157
-
158
- def _create_tasks_from_table(self, t: Table, col_mapping: dict[str, str], existing_tasks: dict[tuple, dict]) -> None:
159
- t_col_types = t.column_types()
160
- config = self._project_config
161
-
162
- # Columns in `t` that map to Label Studio data keys
163
- t_data_cols = [
164
- t_col_name for t_col_name, r_col_name in col_mapping.items()
165
- if r_col_name in config.data_keys
166
- ]
167
-
168
- # Columns in `t` that map to `rectanglelabels` preannotations
169
- t_rl_cols = [
170
- t_col_name for t_col_name, r_col_name in col_mapping.items()
171
- if r_col_name in config.rectangle_labels
172
- ]
173
-
174
- # Destinations for `rectanglelabels` preannotations
175
- rl_info = list(config.rectangle_labels.values())
176
-
177
- _logger.debug('`t_data_cols`: %s', t_data_cols)
178
- _logger.debug('`t_rl_cols`: %s', t_rl_cols)
179
- _logger.debug('`rl_info`: %s', rl_info)
180
-
181
- if len(t_data_cols) == 1 and t_col_types[t_data_cols[0]].is_media_type():
182
- # With a single media column, we can post local files to Label Studio using
183
- # the file transfer API.
184
- self._create_tasks_by_post(t, col_mapping, existing_tasks, t_rl_cols, rl_info, t_data_cols[0])
185
- else:
186
- # Either a single non-media column or multiple columns. Either way, we can't
187
- # use the file upload API and need to rely on externally accessible URLs for
188
- # media columns.
189
- self._create_tasks_by_urls(t, col_mapping, existing_tasks, t_data_cols, t_col_types, t_rl_cols, rl_info)
190
-
191
- def _create_tasks_by_post(
192
- self,
193
- t: Table,
194
- col_mapping: dict[str, str],
195
- existing_tasks: dict[tuple, dict],
196
- t_rl_cols: list[str],
197
- rl_info: list['_RectangleLabel'],
198
- media_col_name: str
199
- ) -> None:
200
- is_stored = t[media_col_name].col.is_stored
201
- # If it's a stored column, we can use `localpath`
202
- localpath_col_opt = [t[media_col_name].localpath] if is_stored else []
203
- # Select the media column, rectanglelabels columns, and localpath (if appropriate)
204
- rows = t.select(t[media_col_name], *[t[col] for col in t_rl_cols], *localpath_col_opt)
205
- tasks_created = 0
206
- row_ids_in_pxt: set[tuple] = set()
207
-
208
- for row in rows._exec():
209
- media_col_idx = rows._select_list_exprs[0].slot_idx
210
- rl_col_idxs = [expr.slot_idx for expr in rows._select_list_exprs[1: 1 + len(t_rl_cols)]]
211
- row_ids_in_pxt.add(row.rowid)
212
- if row.rowid not in existing_tasks:
213
- # Upload the media file to Label Studio
214
- if is_stored:
215
- # There is an existing localpath; use it!
216
- localpath_col_idx = rows._select_list_exprs[-1].slot_idx
217
- file = Path(row.vals[localpath_col_idx])
218
- task_id: int = self.project.import_tasks(file)[0]
219
- else:
220
- # No localpath; create a temp file and upload it
221
- assert isinstance(row.vals[media_col_idx], PIL.Image.Image)
222
- file = env.Env.get().create_tmp_path(extension='.png')
223
- row.vals[media_col_idx].save(file, format='png')
224
- task_id: int = self.project.import_tasks(file)[0]
225
- os.remove(file)
226
-
227
- # Update the task with `rowid` metadata
228
- self.project.update_task(task_id, meta={'rowid': row.rowid})
229
-
230
- # Convert coco annotations to predictions
231
- coco_annotations = [row.vals[i] for i in rl_col_idxs]
232
- _logger.debug('`coco_annotations`: %s', coco_annotations)
233
- predictions = [
234
- self._coco_to_predictions(
235
- coco_annotations[i], col_mapping[t_rl_cols[i]], rl_info[i], task_id=task_id
236
- )
237
- for i in range(len(coco_annotations))
238
- ]
239
- _logger.debug(f'`predictions`: %s', predictions)
240
- self.project.create_predictions(predictions)
241
- tasks_created += 1
242
-
243
- print(f'Created {tasks_created} new task(s) in {self}.')
244
-
245
- self._delete_stale_tasks(existing_tasks, row_ids_in_pxt, tasks_created)
246
-
247
- def _create_tasks_by_urls(
248
- self,
249
- t: Table,
250
- col_mapping: dict[str, str],
251
- existing_tasks: dict[tuple, dict],
252
- t_data_cols: list[str],
253
- t_col_types: dict[str, pxt.ColumnType],
254
- t_rl_cols: list[str],
255
- rl_info: list['_RectangleLabel']
256
- ):
257
- # TODO(aaron-siegel): This is just a placeholder (implementation is not complete or tested!)
258
- selection = [
259
- t[col_name].fileurl if t_col_types[col_name].is_media_type() else t[col_name]
260
- for col_name in t_data_cols
261
- ]
262
- r_data_cols = [col_mapping[col_name] for col_name in t_data_cols]
263
- rows = t.select(*selection, *[t[col] for col in t_rl_cols])
264
- new_rows = filter(lambda row: row.rowid not in existing_tasks, rows._exec())
265
- tasks_created = 0
266
- row_ids_in_pxt: set[tuple] = set()
267
-
268
- for page in more_itertools.batched(new_rows, n=_PAGE_SIZE):
269
- data_col_idxs = [expr.slot_idx for expr in rows._select_list_exprs[:len(t_data_cols)]]
270
- rl_col_idxs = [expr.slot_idx for expr in rows._select_list_exprs[len(t_data_cols):]]
271
- tasks = []
272
-
273
- for row in page:
274
- row_ids_in_pxt.add(row.rowid)
275
- data_vals = [row.vals[i] for i in data_col_idxs]
276
- coco_annotations = [row.vals[i] for i in rl_col_idxs]
277
- predictions = [
278
- self._coco_to_predictions(coco_annotations[i], col_mapping[t_rl_cols[i]], rl_info[i])
279
- for i in range(len(coco_annotations))
280
- ]
281
-
282
- # Validate media columns
283
- # TODO Support this if label studio is running on localhost?
284
- for i in range(len(data_vals)):
285
- if t[t_data_cols[i]].col_type.is_media_type() and data_vals[i].startswith("file://"):
286
- raise excs.Error(
287
- 'Cannot use locally stored media files in a `LabelStudioProject` with more than one '
288
- 'data key. (This is a limitation of Label Studio; see warning here: '
289
- 'https://labelstud.io/guide/tasks.html)'
290
- )
291
-
292
- tasks.append({
293
- 'data': zip(r_data_cols, data_vals),
294
- 'meta': {'rowid': row.rowid},
295
- 'predictions': predictions
296
- })
297
-
298
- self.project.import_tasks(tasks)
299
- tasks_created += len(tasks)
300
-
301
- print(f'Created {tasks_created} new task(s) in {self}.')
302
-
303
- self._delete_stale_tasks(existing_tasks, row_ids_in_pxt, tasks_created)
304
-
305
- def _delete_stale_tasks(self, existing_tasks: dict[tuple, dict], row_ids_in_pxt: set[tuple], tasks_created: int):
306
- tasks_to_delete = [
307
- task['id'] for rowid, task in existing_tasks.items()
308
- if rowid not in row_ids_in_pxt
309
- ]
310
- # Sanity check the math
311
- assert len(tasks_to_delete) == len(existing_tasks) + tasks_created - len(row_ids_in_pxt)
312
-
313
- if len(tasks_to_delete) > 0:
314
- self.project.delete_tasks(tasks_to_delete)
315
- print(f'Deleted {len(tasks_to_delete)} tasks(s) in {self} that are no longer present in Pixeltable.')
316
-
317
- def to_dict(self) -> dict[str, Any]:
318
- return {'project_id': self.project_id}
319
-
320
- @classmethod
321
- def from_dict(cls, md: dict[str, Any]) -> 'LabelStudioProject':
322
- return LabelStudioProject(md['project_id'])
323
-
324
- def __repr__(self) -> str:
325
- name = self.project.get_params()['title']
326
- return f'LabelStudioProject `{name}`'
327
-
328
- @classmethod
329
- def _parse_project_config(cls, xml_config: str) -> '_LabelStudioConfig':
330
- """
331
- Parses a Label Studio XML config, extracting the names and Pixeltable types of
332
- all input variables.
333
- """
334
- root: ElementTree.Element = ElementTree.fromstring(xml_config)
335
- if root.tag.lower() != 'view':
336
- raise excs.Error('Root of Label Studio config must be a `View`')
337
- config = _LabelStudioConfig(
338
- data_keys=dict(cls._parse_data_keys_config(root)),
339
- rectangle_labels=dict(cls._parse_rectangle_labels_config(root))
340
- )
341
- config.validate()
342
- return config
343
-
344
- @classmethod
345
- def _parse_data_keys_config(cls, root: ElementTree.Element) -> Iterator[tuple[str, '_DataKey']]:
346
- for element in root:
347
- if 'value' in element.attrib and element.attrib['value'][0] == '$':
348
- remote_col_name = element.attrib['value'][1:]
349
- if 'name' not in element.attrib:
350
- raise excs.Error(f'Data key is missing `name` attribute: `{remote_col_name}`')
351
- element_type = _LS_TAG_MAP.get(element.tag.lower())
352
- if element_type is None:
353
- raise excs.Error(
354
- f'Unsupported Label Studio data type: `{element.tag}` (in data key `{remote_col_name}`)'
355
- )
356
- yield remote_col_name, _DataKey(element.attrib['name'], element_type)
357
-
358
- @classmethod
359
- def _parse_rectangle_labels_config(cls, root: ElementTree.Element) -> Iterator[tuple[str, '_RectangleLabel']]:
360
- for element in root:
361
- if element.tag.lower() == 'rectanglelabels':
362
- name = element.attrib['name']
363
- to_name = element.attrib['toName']
364
- labels = [
365
- child.attrib['value']
366
- for child in element if child.tag.lower() == 'label'
367
- ]
368
- for label in labels:
369
- if label not in coco.COCO_2017_CATEGORIES.values():
370
- raise excs.Error(f'Label in `rectanglelabels` config is not a valid COCO object name: {label}')
371
- yield name, _RectangleLabel(to_name=to_name, labels=labels)
372
-
373
- @classmethod
374
- def _coco_to_predictions(
375
- cls,
376
- coco_annotations: dict[str, Any],
377
- from_name: str,
378
- rl_info: '_RectangleLabel',
379
- task_id: Optional[int] = None
380
- ) -> dict[str, Any]:
381
- width = coco_annotations['image']['width']
382
- height = coco_annotations['image']['height']
383
- result = [
384
- {
385
- 'id': f'result_{i}',
386
- 'type': 'rectanglelabels',
387
- 'from_name': from_name,
388
- 'to_name': rl_info.to_name,
389
- 'image_rotation': 0,
390
- 'original_width': width,
391
- 'original_height': height,
392
- 'value': {
393
- 'rotation': 0,
394
- # Label Studio expects image coordinates as % of image dimensions
395
- 'x': entry['bbox'][0] * 100.0 / width,
396
- 'y': entry['bbox'][1] * 100.0 / height,
397
- 'width': entry['bbox'][2] * 100.0 / width,
398
- 'height': entry['bbox'][3] * 100.0 / height,
399
- 'rectanglelabels': [coco.COCO_2017_CATEGORIES[entry['category']]]
400
- }
401
- }
402
- for i, entry in enumerate(coco_annotations['annotations'])
403
- # include only the COCO labels that match a rectanglelabel name
404
- if coco.COCO_2017_CATEGORIES[entry['category']] in rl_info.labels
405
- ]
406
- if task_id is not None:
407
- return {'task': task_id, 'result': result}
408
- else:
409
- return {'result': result}
410
-
411
-
412
- @dataclass(frozen=True)
413
- class _DataKey:
414
- name: str # The 'name' attribute of the data key; may differ from the field name
415
- column_type: pxt.ColumnType
416
-
417
-
418
- @dataclass(frozen=True)
419
- class _RectangleLabel:
420
- to_name: str
421
- labels: list[str]
422
-
423
-
424
- @dataclass(frozen=True)
425
- class _LabelStudioConfig:
426
- data_keys: dict[str, _DataKey]
427
- rectangle_labels: dict[str, _RectangleLabel]
428
-
429
- def validate(self) -> None:
430
- data_key_names = set(key.name for key in self.data_keys.values())
431
- for name, rl in self.rectangle_labels.items():
432
- if rl.to_name not in data_key_names:
433
- raise excs.Error(
434
- f'Invalid Label Studio configuration: `toName` attribute of RectangleLabels `{name}` '
435
- f'references an unknown data key: `{rl.to_name}`'
436
- )
437
-
438
- @property
439
- def export_columns(self) -> dict[str, pxt.ColumnType]:
440
- data_key_cols = {key_name: key_info.column_type for key_name, key_info in self.data_keys.items()}
441
- rl_cols = {name: pxt.JsonType() for name in self.rectangle_labels.keys()}
442
- return {**data_key_cols, **rl_cols}
443
-
444
-
445
- ANNOTATIONS_COLUMN = 'annotations'
446
- _PAGE_SIZE = 100 # This is the default used in the LS SDK
447
- _LS_TAG_MAP = {
448
- 'text': pxt.StringType(),
449
- 'image': pxt.ImageType(),
450
- 'video': pxt.VideoType(),
451
- 'audio': pxt.AudioType()
452
- }