pixeltable 0.2.13__py3-none-any.whl → 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (51) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/column.py +5 -0
  4. pixeltable/catalog/globals.py +8 -0
  5. pixeltable/catalog/table.py +22 -4
  6. pixeltable/catalog/table_version.py +30 -55
  7. pixeltable/catalog/view.py +1 -1
  8. pixeltable/exec/__init__.py +2 -1
  9. pixeltable/exec/row_update_node.py +61 -0
  10. pixeltable/exec/{sql_scan_node.py → sql_node.py} +120 -56
  11. pixeltable/exprs/__init__.py +1 -1
  12. pixeltable/exprs/expr.py +35 -22
  13. pixeltable/exprs/function_call.py +60 -29
  14. pixeltable/exprs/globals.py +2 -0
  15. pixeltable/exprs/inline_array.py +18 -11
  16. pixeltable/exprs/method_ref.py +63 -0
  17. pixeltable/ext/__init__.py +9 -0
  18. pixeltable/ext/functions/__init__.py +8 -0
  19. pixeltable/ext/functions/whisperx.py +45 -5
  20. pixeltable/ext/functions/yolox.py +60 -14
  21. pixeltable/func/callable_function.py +12 -4
  22. pixeltable/func/expr_template_function.py +1 -1
  23. pixeltable/func/function.py +12 -2
  24. pixeltable/func/function_registry.py +24 -9
  25. pixeltable/func/udf.py +32 -4
  26. pixeltable/functions/__init__.py +1 -1
  27. pixeltable/functions/fireworks.py +33 -0
  28. pixeltable/functions/huggingface.py +96 -6
  29. pixeltable/functions/image.py +226 -41
  30. pixeltable/functions/openai.py +214 -0
  31. pixeltable/functions/string.py +195 -218
  32. pixeltable/functions/timestamp.py +210 -0
  33. pixeltable/functions/together.py +106 -0
  34. pixeltable/functions/video.py +2 -2
  35. pixeltable/functions/whisper.py +32 -0
  36. pixeltable/io/__init__.py +1 -1
  37. pixeltable/io/globals.py +133 -1
  38. pixeltable/io/pandas.py +52 -27
  39. pixeltable/metadata/__init__.py +1 -1
  40. pixeltable/metadata/converters/convert_18.py +39 -0
  41. pixeltable/metadata/notes.py +10 -0
  42. pixeltable/plan.py +76 -1
  43. pixeltable/tool/create_test_db_dump.py +3 -4
  44. pixeltable/tool/doc_plugins/griffe.py +4 -0
  45. pixeltable/type_system.py +15 -14
  46. {pixeltable-0.2.13.dist-info → pixeltable-0.2.14.dist-info}/METADATA +1 -1
  47. {pixeltable-0.2.13.dist-info → pixeltable-0.2.14.dist-info}/RECORD +50 -45
  48. pixeltable/exprs/image_member_access.py +0 -96
  49. {pixeltable-0.2.13.dist-info → pixeltable-0.2.14.dist-info}/LICENSE +0 -0
  50. {pixeltable-0.2.13.dist-info → pixeltable-0.2.14.dist-info}/WHEEL +0 -0
  51. {pixeltable-0.2.13.dist-info → pixeltable-0.2.14.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,210 @@
1
+ """
2
+ Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs) for `TimestampType`.
3
+
4
+ Usage example:
5
+ ```python
6
+ import pixeltable as pxt
7
+
8
+ t = pxt.get_table(...)
9
+ t.select(t.timestamp_col.year, t.timestamp_col.weekday()).collect()
10
+ ```
11
+ """
12
+
13
+ from datetime import datetime
14
+ from typing import Optional
15
+
16
+ import pixeltable.func as func
17
+ from pixeltable.utils.code import local_public_names
18
+
19
+
20
+ @func.udf(is_method=True)
21
+ def year(self: datetime) -> int:
22
+ """
23
+ Between [`MINYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MINYEAR) and
24
+ [`MAXYEAR`](https://docs.python.org/3/library/datetime.html#datetime.MAXYEAR) inclusive.
25
+
26
+ Equivalent to [`datetime.year`](https://docs.python.org/3/library/datetime.html#datetime.datetime.year).
27
+ """
28
+ return self.year
29
+
30
+
31
+ @func.udf(is_method=True)
32
+ def month(self: datetime) -> int:
33
+ """
34
+ Between 1 and 12 inclusive.
35
+
36
+ Equivalent to [`datetime.month`](https://docs.python.org/3/library/datetime.html#datetime.datetime.month).
37
+ """
38
+ return self.month
39
+
40
+
41
+ @func.udf(is_method=True)
42
+ def day(self: datetime) -> int:
43
+ """
44
+ Between 1 and the number of days in the given month of the given year.
45
+
46
+ Equivalent to [`datetime.day`](https://docs.python.org/3/library/datetime.html#datetime.datetime.day).
47
+ """
48
+ return self.day
49
+
50
+
51
+ @func.udf(is_method=True)
52
+ def hour(self: datetime) -> int:
53
+ """
54
+ Between 0 and 23 inclusive.
55
+
56
+ Equivalent to [`datetime.hour`](https://docs.python.org/3/library/datetime.html#datetime.datetime.hour).
57
+ """
58
+ return self.hour
59
+
60
+
61
+ @func.udf(is_method=True)
62
+ def minute(self: datetime) -> int:
63
+ """
64
+ Between 0 and 59 inclusive.
65
+
66
+ Equivalent to [`datetime.minute`](https://docs.python.org/3/library/datetime.html#datetime.datetime.minute).
67
+ """
68
+ return self.minute
69
+
70
+
71
+ @func.udf(is_method=True)
72
+ def second(self: datetime) -> int:
73
+ """
74
+ Between 0 and 59 inclusive.
75
+
76
+ Equivalent to [`datetime.second`](https://docs.python.org/3/library/datetime.html#datetime.datetime.second).
77
+ """
78
+ return self.second
79
+
80
+
81
+ @func.udf(is_method=True)
82
+ def microsecond(self: datetime) -> int:
83
+ """
84
+ Between 0 and 999999 inclusive.
85
+
86
+ Equivalent to [`datetime.microsecond`](https://docs.python.org/3/library/datetime.html#datetime.datetime.microsecond).
87
+ """
88
+ return self.microsecond
89
+
90
+
91
+ @func.udf(is_method=True)
92
+ def weekday(self: datetime) -> int:
93
+ """
94
+ Between 0 (Monday) and 6 (Sunday) inclusive.
95
+
96
+ Equivalent to [`datetime.weekday()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.weekday).
97
+ """
98
+ return self.weekday()
99
+
100
+ @func.udf(is_method=True)
101
+ def isoweekday(self: datetime) -> int:
102
+ """
103
+ Return the day of the week as an integer, where Monday is 1 and Sunday is 7.
104
+
105
+ Equivalent to [`datetime.isoweekday()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.isoweekday).
106
+ """
107
+ return self.isoweekday()
108
+
109
+
110
+ @func.udf(is_method=True)
111
+ def isocalendar(self: datetime) -> dict:
112
+ """
113
+ Return a dictionary with three entries: `'year'`, `'week'`, and `'weekday'`.
114
+
115
+ Equivalent to
116
+ [`datetime.isocalendar()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.isocalendar).
117
+ """
118
+ iso_year, iso_week, iso_weekday = self.isocalendar()
119
+ return {'year': iso_year, 'week': iso_week, 'weekday': iso_weekday}
120
+
121
+
122
+ @func.udf(is_method=True)
123
+ def isoformat(self: datetime, sep: str = 'T', timespec: str = 'auto') -> str:
124
+ """
125
+ Return a string representing the date and time in ISO 8601 format.
126
+
127
+ Equivalent to [`datetime.isoformat()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.isoformat).
128
+
129
+ Args:
130
+ sep: Separator between date and time.
131
+ timespec: The number of additional terms in the output. See the [`datetime.isoformat()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.isoformat) documentation for more details.
132
+ """
133
+ return self.isoformat(sep=sep, timespec=timespec)
134
+
135
+
136
+ @func.udf(is_method=True)
137
+ def strftime(self: datetime, format: str) -> str:
138
+ """
139
+ Return a string representing the date and time, controlled by an explicit format string.
140
+
141
+ Equivalent to [`datetime.strftime()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.strftime).
142
+
143
+ Args:
144
+ format: The format string to control the output. For a complete list of formatting directives, see [`strftime()` and `strptime()` Behavior](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior).
145
+ """
146
+ return self.strftime(format)
147
+
148
+
149
+ # @func.udf
150
+ # def date(self: datetime) -> datetime:
151
+ # """
152
+ # Return the date part of the datetime.
153
+ #
154
+ # Equivalent to [`datetime.date()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.date).
155
+ # """
156
+ # d = self.date()
157
+ # return datetime(d.year, d.month, d.day)
158
+ #
159
+ #
160
+ # @func.udf
161
+ # def time(self: datetime) -> datetime:
162
+ # """
163
+ # Return the time part of the datetime, with microseconds set to 0.
164
+ #
165
+ # Equivalent to [`datetime.time()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.time).
166
+ # """
167
+ # t = self.time()
168
+ # return datetime(1, 1, 1, t.hour, t.minute, t.second, t.microsecond)
169
+
170
+
171
+ @func.udf(is_method=True)
172
+ def replace(
173
+ self: datetime, year: Optional[int] = None, month: Optional[int] = None, day: Optional[int] = None,
174
+ hour: Optional[int] = None, minute: Optional[int] = None, second: Optional[int] = None,
175
+ microsecond: Optional[int] = None) -> datetime:
176
+ """
177
+ Return a datetime with the same attributes, except for those attributes given new values by whichever keyword
178
+ arguments are specified.
179
+
180
+ Equivalent to [`datetime.replace()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.replace).
181
+ """
182
+ kwargs = {k: v for k, v in locals().items() if k != 'self' and v is not None}
183
+ return self.replace(**kwargs)
184
+
185
+
186
+ @func.udf(is_method=True)
187
+ def toordinal(self: datetime) -> int:
188
+ """
189
+ Return the proleptic Gregorian ordinal of the date, where January 1 of year 1 has ordinal 1.
190
+
191
+ Equivalent to [`datetime.toordinal()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.toordinal).
192
+ """
193
+ return self.toordinal()
194
+
195
+
196
+ @func.udf(is_method=True)
197
+ def posix_timestamp(self: datetime) -> float:
198
+ """
199
+ Return POSIX timestamp corresponding to the datetime instance.
200
+
201
+ Equivalent to [`datetime.timestamp()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.timestamp).
202
+ """
203
+ return self.timestamp()
204
+
205
+
206
+ __all__ = local_public_names(__name__)
207
+
208
+
209
+ def __dir__():
210
+ return __all__
@@ -1,3 +1,10 @@
1
+ """
2
+ Pixeltable [UDFs](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
3
+ that wrap various endpoints from the Together AI API. In order to use them, you must
4
+ first `pip install together` and configure your Together AI credentials, as described in
5
+ the [Working with Together AI](https://pixeltable.readme.io/docs/together-ai) tutorial.
6
+ """
7
+
1
8
  import base64
2
9
  from typing import Optional, TYPE_CHECKING
3
10
 
@@ -41,6 +48,31 @@ def completions(
41
48
  n: Optional[int] = None,
42
49
  safety_model: Optional[str] = None,
43
50
  ) -> dict:
51
+ """
52
+ Generate completions based on a given prompt using a specified model.
53
+
54
+ Equivalent to the Together AI `completions` API endpoint.
55
+ For additional details, see: [https://docs.together.ai/reference/completions-1](https://docs.together.ai/reference/completions-1)
56
+
57
+ __Requirements:__
58
+
59
+ - `pip install together`
60
+
61
+ Args:
62
+ prompt: A string providing context for the model to complete.
63
+ model: The name of the model to query.
64
+
65
+ For details on the other parameters, see: [https://docs.together.ai/reference/completions-1](https://docs.together.ai/reference/completions-1)
66
+
67
+ Returns:
68
+ A dictionary containing the response and other metadata.
69
+
70
+ Examples:
71
+ Add a computed column that applies the model `mistralai/Mixtral-8x7B-v0.1` to an existing Pixeltable column `tbl.prompt`
72
+ of the table `tbl`:
73
+
74
+ >>> tbl['response'] = completions(tbl.prompt, model='mistralai/Mixtral-8x7B-v0.1')
75
+ """
44
76
  return (
45
77
  _together_client()
46
78
  .completions.create(
@@ -80,6 +112,32 @@ def chat_completions(
80
112
  tools: Optional[dict] = None,
81
113
  tool_choice: Optional[dict] = None,
82
114
  ) -> dict:
115
+ """
116
+ Generate chat completions based on a given prompt using a specified model.
117
+
118
+ Equivalent to the Together AI `chat/completions` API endpoint.
119
+ For additional details, see: [https://docs.together.ai/reference/chat-completions-1](https://docs.together.ai/reference/chat-completions-1)
120
+
121
+ __Requirements:__
122
+
123
+ - `pip install together`
124
+
125
+ Args:
126
+ messages: A list of messages comprising the conversation so far.
127
+ model: The name of the model to query.
128
+
129
+ For details on the other parameters, see: [https://docs.together.ai/reference/chat-completions-1](https://docs.together.ai/reference/chat-completions-1)
130
+
131
+ Returns:
132
+ A dictionary containing the response and other metadata.
133
+
134
+ Examples:
135
+ Add a computed column that applies the model `mistralai/Mixtral-8x7B-v0.1` to an existing Pixeltable column `tbl.prompt`
136
+ of the table `tbl`:
137
+
138
+ >>> messages = [{'role': 'user', 'content': tbl.prompt}]
139
+ ... tbl['response'] = chat_completions(tbl.prompt, model='mistralai/Mixtral-8x7B-v0.1')
140
+ """
83
141
  return (
84
142
  _together_client()
85
143
  .chat.completions.create(
@@ -117,6 +175,29 @@ _embedding_dimensions_cache = {
117
175
 
118
176
  @pxt.udf(batch_size=32, return_type=pxt.ArrayType((None,), dtype=pxt.FloatType()))
119
177
  def embeddings(input: Batch[str], *, model: str) -> Batch[np.ndarray]:
178
+ """
179
+ Query an embedding model for a given string of text.
180
+
181
+ Equivalent to the Together AI `embeddings` API endpoint.
182
+ For additional details, see: [https://docs.together.ai/reference/embeddings-2](https://docs.together.ai/reference/embeddings-2)
183
+
184
+ __Requirements:__
185
+
186
+ - `pip install together`
187
+
188
+ Args:
189
+ input: A string providing the text for the model to embed.
190
+ model: The name of the embedding model to use.
191
+
192
+ Returns:
193
+ An array representing the application of the given embedding to `input`.
194
+
195
+ Examples:
196
+ Add a computed column that applies the model `togethercomputer/m2-bert-80M-8k-retrieval`
197
+ to an existing Pixeltable column `tbl.text` of the table `tbl`:
198
+
199
+ >>> tbl['response'] = embeddings(tbl.text, model='togethercomputer/m2-bert-80M-8k-retrieval')
200
+ """
120
201
  result = _together_client().embeddings.create(input=input, model=model)
121
202
  return [np.array(data.embedding, dtype=np.float64) for data in result.data]
122
203
 
@@ -141,6 +222,31 @@ def image_generations(
141
222
  width: Optional[int] = None,
142
223
  negative_prompt: Optional[str] = None,
143
224
  ) -> PIL.Image.Image:
225
+ """
226
+ Generate images based on a given prompt using a specified model.
227
+
228
+ Equivalent to the Together AI `images/generations` API endpoint.
229
+ For additional details, see: [https://docs.together.ai/reference/post_images-generations](https://docs.together.ai/reference/post_images-generations)
230
+
231
+ __Requirements:__
232
+
233
+ - `pip install together`
234
+
235
+ Args:
236
+ prompt: A description of the desired images.
237
+ model: The model to use for image generation.
238
+
239
+ For details on the other parameters, see: [https://docs.together.ai/reference/post_images-generations](https://docs.together.ai/reference/post_images-generations)
240
+
241
+ Returns:
242
+ The generated image.
243
+
244
+ Examples:
245
+ Add a computed column that applies the model `runwayml/stable-diffusion-v1-5`
246
+ to an existing Pixeltable column `tbl.prompt` of the table `tbl`:
247
+
248
+ >>> tbl['response'] = image_generations(tbl.prompt, model='runwayml/stable-diffusion-v1-5')
249
+ """
144
250
  # TODO(aaron-siegel): Decompose CPU/GPU ops into separate functions
145
251
  result = _together_client().images.generate(
146
252
  prompt=prompt, model=model, steps=steps, seed=seed, height=height, width=width, negative_prompt=negative_prompt
@@ -96,7 +96,7 @@ _extract_audio_param_types = [
96
96
  ]
97
97
 
98
98
 
99
- @func.udf(return_type=ts.AudioType(nullable=True), param_types=_extract_audio_param_types)
99
+ @func.udf(return_type=ts.AudioType(nullable=True), param_types=_extract_audio_param_types, is_method=True)
100
100
  def extract_audio(
101
101
  video_path: str, stream_idx: int = 0, format: str = 'wav', codec: Optional[str] = None
102
102
  ) -> Optional[str]:
@@ -128,7 +128,7 @@ def extract_audio(
128
128
  return output_filename
129
129
 
130
130
 
131
- @func.udf(return_type=ts.JsonType(nullable=False), param_types=[ts.VideoType(nullable=False)])
131
+ @func.udf(return_type=ts.JsonType(nullable=False), param_types=[ts.VideoType(nullable=False)], is_method=True)
132
132
  def get_metadata(video: str) -> dict:
133
133
  """
134
134
  Gets various metadata associated with a video file and returns it as a dictionary.
@@ -1,3 +1,11 @@
1
+ """
2
+ Pixeltable [UDF](https://pixeltable.readme.io/docs/user-defined-functions-udfs)
3
+ that wraps the OpenAI Whisper library.
4
+
5
+ This UDF will cause Pixeltable to invoke the relevant model locally. In order to use it, you must
6
+ first `pip install openai-whisper`.
7
+ """
8
+
1
9
  from typing import TYPE_CHECKING, Optional
2
10
 
3
11
  import pixeltable as pxt
@@ -39,6 +47,30 @@ def transcribe(
39
47
  append_punctuations: str = '"\'.。,,!!??::”)]}、',
40
48
  decode_options: Optional[dict] = None,
41
49
  ) -> dict:
50
+ """
51
+ Transcribe an audio file using Whisper.
52
+
53
+ This UDF runs a transcription model _locally_ using the Whisper library,
54
+ equivalent to the Whisper `transcribe` function, as described in the
55
+ [Whisper library documentation](https://github.com/openai/whisper).
56
+
57
+ __Requirements:__
58
+
59
+ - `pip install openai-whisper`
60
+
61
+ Args:
62
+ audio: The audio file to transcribe.
63
+ model: The name of the model to use for transcription.
64
+
65
+ Returns:
66
+ A dictionary containing the transcription and various other metadata.
67
+
68
+ Examples:
69
+ Add a computed column that applies the model `base.en` to an existing Pixeltable column `tbl.audio`
70
+ of the table `tbl`:
71
+
72
+ >>> tbl['result'] = transcribe(tbl.audio, model='base.en')
73
+ """
42
74
  import torch
43
75
 
44
76
  if decode_options is None:
pixeltable/io/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  from .external_store import ExternalStore, SyncStatus
2
- from .globals import create_label_studio_project
2
+ from .globals import create_label_studio_project, import_rows, import_json
3
3
  from .hf_datasets import import_huggingface_dataset
4
4
  from .pandas import import_csv, import_excel, import_pandas
5
5
  from .parquet import import_parquet
pixeltable/io/globals.py CHANGED
@@ -1,5 +1,7 @@
1
- from typing import Any, Optional, Literal
1
+ from typing import Any, Literal, Optional, Union
2
+ import urllib.request
2
3
 
4
+ import pixeltable as pxt
3
5
  import pixeltable.exceptions as excs
4
6
  from pixeltable import Table
5
7
  from pixeltable.io.external_store import SyncStatus
@@ -134,3 +136,133 @@ def create_label_studio_project(
134
136
  return t.sync()
135
137
  else:
136
138
  return SyncStatus.empty()
139
+
140
+
141
+ def import_rows(
142
+ tbl_path: str,
143
+ rows: list[dict[str, Any]],
144
+ *,
145
+ schema_overrides: Optional[dict[str, pxt.ColumnType]] = None,
146
+ primary_key: Optional[Union[str, list[str]]] = None,
147
+ num_retained_versions: int = 10,
148
+ comment: str = ''
149
+ ) -> Table:
150
+ """
151
+ Creates a new `Table` from a list of dictionaries. The dictionaries must be of the form
152
+ `{column_name: value, ...}`. Pixeltable will attempt to infer the schema of the table from the
153
+ supplied data, using the most specific type that can represent all the values in a column.
154
+
155
+ If `schema_overrides` is specified, then for each entry `(column_name, type)` in `schema_overrides`,
156
+ Pixeltable will force the specified column to the specified type (and will not attempt any type inference
157
+ for that column).
158
+
159
+ All column types of the new `Table` will be nullable unless explicitly specified as non-nullable in
160
+ `schema_overrides`.
161
+
162
+ Args:
163
+ tbl_path: The qualified name of the table to create.
164
+ rows: The list of dictionaries to import.
165
+ schema_overrides: If specified, then columns in `schema_overrides` will be given the specified types
166
+ as described above.
167
+ primary_key: The primary key of the table (see [`create_table()`][pixeltable.create_table]).
168
+ num_retained_versions: The number of retained versions of the table (see [`create_table()`][pixeltable.create_table]).
169
+ comment: A comment to attach to the table (see [`create_table()`][pixeltable.create_table]).
170
+
171
+ Returns:
172
+ The newly created `Table`.
173
+ """
174
+ if schema_overrides is None:
175
+ schema_overrides = {}
176
+ schema: dict[str, pxt.ColumnType] = {}
177
+ cols_with_nones: set[str] = set()
178
+
179
+ for n, row in enumerate(rows):
180
+ for col_name, value in row.items():
181
+ if col_name in schema_overrides:
182
+ # We do the insertion here; this will ensure that the column order matches the order
183
+ # in which the column names are encountered in the input data, even if `schema_overrides`
184
+ # is specified.
185
+ if col_name not in schema:
186
+ schema[col_name] = schema_overrides[col_name]
187
+ elif value is not None:
188
+ # If `key` is not in `schema_overrides`, then we infer its type from the data.
189
+ # The column type will always be nullable by default.
190
+ col_type = pxt.ColumnType.infer_literal_type(value).copy(nullable=True)
191
+ if col_name not in schema:
192
+ schema[col_name] = col_type
193
+ else:
194
+ supertype = pxt.ColumnType.supertype(schema[col_name], col_type)
195
+ if supertype is None:
196
+ raise excs.Error(
197
+ f'Could not infer type of column `{col_name}`; the value in row {n} does not match preceding type {schema[col_name]}: {value!r}\n'
198
+ 'Consider specifying the type explicitly in `schema_overrides`.'
199
+ )
200
+ schema[col_name] = supertype
201
+ else:
202
+ cols_with_nones.add(col_name)
203
+
204
+ extraneous_keys = schema_overrides.keys() - schema.keys()
205
+ if len(extraneous_keys) > 0:
206
+ raise excs.Error(f'The following columns specified in `schema_overrides` are not present in the data: {", ".join(extraneous_keys)}')
207
+
208
+ entirely_none_cols = cols_with_nones - schema.keys()
209
+ if len(entirely_none_cols) > 0:
210
+ # A column can only end up in `entirely_null_cols` if it was not in `schema_overrides` and
211
+ # was not encountered in any row with a non-None value.
212
+ raise excs.Error(
213
+ f'The following columns have no non-null values: {", ".join(entirely_none_cols)}\n'
214
+ 'Consider specifying the type(s) explicitly in `schema_overrides`.'
215
+ )
216
+
217
+ t = pxt.create_table(tbl_path, schema, primary_key=primary_key, num_retained_versions=num_retained_versions, comment=comment)
218
+ t.insert(rows)
219
+ return t
220
+
221
+
222
+ def import_json(
223
+ tbl_path: str,
224
+ filepath_or_url: str,
225
+ *,
226
+ schema_overrides: Optional[dict[str, pxt.ColumnType]] = None,
227
+ primary_key: Optional[Union[str, list[str]]] = None,
228
+ num_retained_versions: int = 10,
229
+ comment: str = '',
230
+ **kwargs: Any
231
+ ) -> Table:
232
+ """
233
+ Creates a new `Table` from a JSON file. This is a convenience method and is equivalent
234
+ to calling `import_data(table_path, json.loads(file_contents, **kwargs), ...)`, where `file_contents`
235
+ is the contents of the specified `filepath_or_url`.
236
+
237
+ Args:
238
+ tbl_path: The name of the table to create.
239
+ filepath_or_url: The path or URL of the JSON file.
240
+ schema_overrides: If specified, then columns in `schema_overrides` will be given the specified types
241
+ (see [`import_rows()`][pixeltable.io.import_rows]).
242
+ primary_key: The primary key of the table (see [`create_table()`][pixeltable.create_table]).
243
+ num_retained_versions: The number of retained versions of the table (see [`create_table()`][pixeltable.create_table]).
244
+ comment: A comment to attach to the table (see [`create_table()`][pixeltable.create_table]).
245
+ kwargs: Additional keyword arguments to pass to `json.loads`.
246
+
247
+ Returns:
248
+ The newly created `Table`.
249
+ """
250
+ import json
251
+ import urllib.parse
252
+ import urllib.request
253
+
254
+ # TODO Consolidate this logic with other places where files/URLs are parsed
255
+ parsed = urllib.parse.urlparse(filepath_or_url)
256
+ if len(parsed.scheme) <= 1 or parsed.scheme == 'file':
257
+ # local file path
258
+ if len(parsed.scheme) <= 1:
259
+ filepath = filepath_or_url
260
+ else:
261
+ filepath = urllib.parse.unquote(urllib.request.url2pathname(parsed.path))
262
+ with open(filepath) as fp:
263
+ contents = fp.read()
264
+ else:
265
+ # URL
266
+ contents = urllib.request.urlopen(filepath_or_url).read()
267
+ data = json.loads(contents, **kwargs)
268
+ return import_rows(tbl_path, data, schema_overrides=schema_overrides, primary_key=primary_key, num_retained_versions=num_retained_versions, comment=comment)