hf2vespa 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hf2vespa/cli.py CHANGED
@@ -287,20 +287,26 @@ def feed(
287
287
 
288
288
  Examples:
289
289
 
290
- # Basic usage
291
- $ hf2vespa feed glue --split test --config ax
290
+ # Basic streaming
291
+ hf2vespa feed mteb/msmarco-v2 --config corpus --split corpus --limit 10
292
292
 
293
- # Filter columns
294
- $ hf2vespa feed glue --split test --config ax --include premise --include hypothesis
293
+ # Rename columns
294
+ hf2vespa feed mteb/msmarco-v2 --config corpus --split corpus --rename _id:id --limit 5
295
+
296
+ # Filter specific columns
297
+ hf2vespa feed mteb/msmarco-v2 --config corpus --split corpus --include title --include text --limit 5
295
298
 
296
299
  # Custom namespace and doctype
297
- $ hf2vespa feed squad --namespace wiki --doctype article
300
+ hf2vespa feed mteb/msmarco-v2 --config corpus --split corpus --namespace search --doctype passage --limit 10
301
+
302
+ # Use config file for complex mappings
303
+ hf2vespa feed mteb/msmarco-v2 --config corpus --split corpus --config-file vespa-config.yaml --limit 10
298
304
 
299
- # Use config file
300
- $ hf2vespa feed glue --config ax --config-file mappings.yaml
305
+ # Skip errors instead of failing
306
+ hf2vespa feed mteb/msmarco-v2 --config corpus --split corpus --on-error skip --limit 10
301
307
 
302
- # Preview first 10 records
303
- $ hf2vespa feed squad --limit 10
308
+ # Pipe directly to Vespa
309
+ hf2vespa feed mteb/msmarco-v2 --config corpus --split corpus --limit 1000 | vespa feed -
304
310
  """
305
311
  feed_impl(
306
312
  dataset=dataset,
@@ -343,14 +349,14 @@ def init(
343
349
 
344
350
  Examples:
345
351
 
346
- # Generate config for a dataset
347
- $ hf2vespa init glue --config ax
352
+ # Generate config for MS MARCO corpus
353
+ hf2vespa init mteb/msmarco-v2 --config corpus -o msmarco-config.yaml
348
354
 
349
355
  # Specify output file
350
- $ hf2vespa init squad --output my-config.yaml
356
+ hf2vespa init mteb/msmarco-v2 --config corpus --output my-config.yaml
351
357
 
352
- # Inspect a specific split
353
- $ hf2vespa init my-dataset --split validation
358
+ # Generate config for Cohere embeddings dataset
359
+ hf2vespa init Cohere/wikipedia-2023-11-embed-multilingual-v3 --config en -o cohere-config.yaml
354
360
  """
355
361
  from hf2vespa.init import init_command
356
362
 
@@ -370,11 +376,18 @@ def install_completion(
370
376
 
371
377
  Detects your shell automatically, or specify explicitly.
372
378
 
379
+ After installation, restart your shell or source your shell config file
380
+ (e.g., source ~/.bashrc).
381
+
373
382
  Examples:
374
383
 
375
- hf2vespa install-completion # Auto-detect shell
376
- hf2vespa install-completion bash # Explicit bash
377
- hf2vespa install-completion zsh # Explicit zsh
384
+ # Auto-detect shell
385
+ hf2vespa install-completion
386
+
387
+ # Explicit shell
388
+ hf2vespa install-completion bash
389
+ hf2vespa install-completion zsh
390
+ hf2vespa install-completion fish
378
391
  """
379
392
  from typer._completion_shared import Shells, install
380
393
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hf2vespa
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: Stream HuggingFace datasets to Vespa JSON format
5
5
  Author-email: Thomas Thoresen <thomas.h.thoresen@gmail.com>
6
6
  License: Apache-2.0
@@ -1,14 +1,14 @@
1
1
  hf2vespa/__init__.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
2
2
  hf2vespa/__main__.py,sha256=8swxmM2GAunJQ2Qs91RDMvu28RxSGTiDwuMpgKy4plQ,67
3
- hf2vespa/cli.py,sha256=SHJErdyixpwKFPuKnGRMwAhWnZdcpwtLlroi2gzPNKU,16258
3
+ hf2vespa/cli.py,sha256=wtyTdYlxn8UyZ7OufB6wJ3tyW73fBDVGecNxat1WhKc,17018
4
4
  hf2vespa/config.py,sha256=JmUxnQcEm_XGg4llbAkQnt28HYgWeh2ldyEsk7ZVgMU,3686
5
5
  hf2vespa/converters.py,sha256=sLzOQolvUez2ZymOGS3asiGFDDgqns8chpcoIzMkfME,20685
6
6
  hf2vespa/init.py,sha256=CF4p9LMLCbwV_OehTsu036p8vjtWK8TngXU9fd_v7SM,10866
7
7
  hf2vespa/pipeline.py,sha256=7q9NIF6GhbgcBXx2Jckxh0tcXi7rMJmjcwkXFiPi_tQ,7145
8
8
  hf2vespa/stats.py,sha256=1Os61QpIpDJKthXWE5oWmK_SHx4bZUkcgVIK6t16ppk,1944
9
9
  hf2vespa/utils.py,sha256=KGV-YwKaO6IPtEpb9NnRrHMxfBOMfjZzrtYJJSUCe14,1706
10
- hf2vespa-0.1.1.dist-info/METADATA,sha256=5S4Tv0Na0uAGpoWlWSCE7RtedyGQc_13Fci7jIPuFug,23169
11
- hf2vespa-0.1.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
12
- hf2vespa-0.1.1.dist-info/entry_points.txt,sha256=R-1FE95nsxKVDqMWPPcXIQT0FL6J4ZWE-Sri68retXE,46
13
- hf2vespa-0.1.1.dist-info/top_level.txt,sha256=Xul9tbYYe1Qw2uYuf-tQiPaPdWRPYsH6K3F2LO6X_lI,9
14
- hf2vespa-0.1.1.dist-info/RECORD,,
10
+ hf2vespa-0.1.2.dist-info/METADATA,sha256=VvjQOSoMorGuIV5gEtob_syvkLfIeBYGWxCauNMsWK4,23169
11
+ hf2vespa-0.1.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
12
+ hf2vespa-0.1.2.dist-info/entry_points.txt,sha256=R-1FE95nsxKVDqMWPPcXIQT0FL6J4ZWE-Sri68retXE,46
13
+ hf2vespa-0.1.2.dist-info/top_level.txt,sha256=Xul9tbYYe1Qw2uYuf-tQiPaPdWRPYsH6K3F2LO6X_lI,9
14
+ hf2vespa-0.1.2.dist-info/RECORD,,