autotouch-cli 0.2.7__tar.gz → 0.2.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/PKG-INFO +48 -2
  2. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/autotouch_cli.egg-info/PKG-INFO +48 -2
  3. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/docs/research-table/reference/autotouch-cli.md +47 -1
  4. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/pyproject.toml +1 -1
  5. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/smart_table_cli.py +376 -111
  6. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/autotouch_cli.egg-info/SOURCES.txt +0 -0
  7. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/autotouch_cli.egg-info/dependency_links.txt +0 -0
  8. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/autotouch_cli.egg-info/entry_points.txt +0 -0
  9. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/autotouch_cli.egg-info/requires.txt +0 -0
  10. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/autotouch_cli.egg-info/top_level.txt +0 -0
  11. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/__init__.py +0 -0
  12. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/add_column_unique_index.py +0 -0
  13. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/attach_csv_import_leads_to_research_table.py +0 -0
  14. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/bundle_sequences_backend.py +0 -0
  15. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/check_agent_traces.py +0 -0
  16. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/check_column_mode.py +0 -0
  17. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/exit_terminal_leads_from_sequences.py +0 -0
  18. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/fetch_lead.py +0 -0
  19. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/fix_lead_titles_from_csv.py +0 -0
  20. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20250106_add_column_position.py +0 -0
  21. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20250108_fix_legacy_column_fields.py +0 -0
  22. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20250109_add_user_fields_to_tables.py +0 -0
  23. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20250117_add_call_logs_webhook_indexes.py +0 -0
  24. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20250117_rename_call_logs_collection.py +0 -0
  25. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20250119_create_leads_unique_email_index.py +0 -0
  26. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20250123_add_filter_indexes.py +0 -0
  27. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20250123_add_llm_responses_collection.py +0 -0
  28. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20250128_migrate_user_ids_to_objectid.py +0 -0
  29. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20250208_backfill_task_research_values.py +0 -0
  30. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20250604_add_origin_indexes.py +0 -0
  31. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20250608_cleanup_agent_metadata.py +0 -0
  32. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20250608_rename_agent_metadata_to_metadata.py +0 -0
  33. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20250922_add_activity_indexes.py +0 -0
  34. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20250926_migrate_single_to_arrays.py +0 -0
  35. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20250928_add_missing_timestamp_fields.py +0 -0
  36. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20250929_add_task_join_indexes.py +0 -0
  37. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20250929_add_task_join_indexes_safe.py +0 -0
  38. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20250929_create_shared_phone_cache.py +0 -0
  39. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20251007_add_rows_position_id_index.py +0 -0
  40. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20251109_add_ttl_for_llm_and_preview_traces.py +0 -0
  41. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20260113_normalize_table_filter_operators.py +0 -0
  42. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20260113_set_user_permissions_user_admin.py +0 -0
  43. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/20260204_sync_lead_owner_from_tasks.py +0 -0
  44. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/migrate_org_user_credits.py +0 -0
  45. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/set_default_lead_status.py +0 -0
  46. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/migrations/update_lead_owner_from_tasks.py +0 -0
  47. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/reassign_sequence_owner.py +0 -0
  48. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/run_sidecar_orchestrator_demo.py +0 -0
  49. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/test_crm_company_policy.py +0 -0
  50. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/test_sequences_instantly_e2e.py +0 -0
  51. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/test_sequences_personal_e2e.py +0 -0
  52. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/scripts/test_task_error_logger.py +0 -0
  53. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/setup.cfg +0 -0
  54. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/tests/test_contactout_custom.py +0 -0
  55. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/tests/test_contactout_integration.py +0 -0
  56. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/tests/test_contactout_multi_titles.py +0 -0
  57. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/tests/test_contactout_pipeline.py +0 -0
  58. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/tests/test_contactout_simple.py +0 -0
  59. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/tests/test_contactout_v2_bulk.py +0 -0
  60. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/tests/test_lead_required_fields.py +0 -0
  61. {autotouch_cli-0.2.7 → autotouch_cli-0.2.9}/tests/test_phone_provider_pipeline.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: autotouch-cli
3
- Version: 0.2.7
3
+ Version: 0.2.9
4
4
  Summary: Autotouch Smart Table CLI
5
5
  Requires-Python: >=3.9
6
6
  Description-Content-Type: text/markdown
@@ -218,7 +218,7 @@ autotouch jobs get --job-id <JOB_ID>
218
218
 
219
219
  ## Safe run patterns (`firstN` + `--unprocessed-only`)
220
220
 
221
- Use this pattern to avoid paying twice for the same top rows.
221
+ Use this pattern for progressive rollouts.
222
222
 
223
223
  ```bash
224
224
  # Pilot first 10 rows
@@ -244,9 +244,33 @@ autotouch columns run \
244
244
 
245
245
  Notes:
246
246
  - `firstN` without `--unprocessed-only` can re-run already-processed rows.
247
+ - With `--unprocessed-only`, `firstN` means "first N currently eligible unprocessed rows", not "exactly N new rows since your last check".
248
+ - If you need an exact count (for example exactly 5 rows), use `run-next` below.
247
249
  - `--wait` polls `/api/bulk-jobs/{job_id}` until terminal status.
248
250
  - If a job stays `queued`, workers for that provider queue may be scaled to `0`.
249
251
 
252
+ ## Exact count runs (`run-next`)
253
+
254
+ Use this when you need exactly `N` rows in one run.
255
+ The CLI selects candidate row IDs first, then executes `/run` with `scope=subset`.
256
+
257
+ ```bash
258
+ # Run exactly 5 unprocessed rows from the current view
259
+ autotouch columns run-next \
260
+ --table-id <TABLE_ID> \
261
+ --column-id <COLUMN_ID> \
262
+ --count 5 \
263
+ --filters-file filters.json \
264
+ --show-estimate \
265
+ --wait
266
+ ```
267
+
268
+ Notes:
269
+ - Default behavior is unprocessed-only selection.
270
+ - Add `--include-processed` to allow already-processed rows into candidate selection.
271
+ - `run-next` is deterministic on count (subject to available eligible rows).
272
+ - If fewer than `N` eligible rows exist, it runs the available subset and reports selected count.
273
+
250
274
  ### Agent execution contract (strict)
251
275
 
252
276
  When operating this CLI as an agent, use backend job state as source of truth:
@@ -414,6 +438,28 @@ autotouch columns run \
414
438
  --show-estimate --wait
415
439
  ```
416
440
 
441
+ ### Cost tip: filter out empty rows between enrichments
442
+
443
+ Most teams run paid enrichments only on rows that already have required upstream data.
444
+ This avoids spending credits on rows that cannot produce useful results yet.
445
+
446
+ Example: run email finder only when `linkedin_url` exists and `work_email_address` is still empty.
447
+
448
+ ```json
449
+ {
450
+ "mode": "and",
451
+ "filters": [
452
+ { "columnKey": "linkedin_url", "operator": "isNotEmpty" },
453
+ { "columnKey": "work_email_address", "operator": "isEmpty" }
454
+ ]
455
+ }
456
+ ```
457
+
458
+ Pattern to reuse:
459
+ - Step 1: create/select a filter that excludes empty prerequisite fields.
460
+ - Step 2: run small (`firstN` or `run-next`) with `--show-estimate`.
461
+ - Step 3: expand only after output quality looks good.
462
+
417
463
  ## Auto-run configuration
418
464
 
419
465
  Auto-run is set on the column definition (`autoRun`) and can be changed later with `columns update`.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: autotouch-cli
3
- Version: 0.2.7
3
+ Version: 0.2.9
4
4
  Summary: Autotouch Smart Table CLI
5
5
  Requires-Python: >=3.9
6
6
  Description-Content-Type: text/markdown
@@ -218,7 +218,7 @@ autotouch jobs get --job-id <JOB_ID>
218
218
 
219
219
  ## Safe run patterns (`firstN` + `--unprocessed-only`)
220
220
 
221
- Use this pattern to avoid paying twice for the same top rows.
221
+ Use this pattern for progressive rollouts.
222
222
 
223
223
  ```bash
224
224
  # Pilot first 10 rows
@@ -244,9 +244,33 @@ autotouch columns run \
244
244
 
245
245
  Notes:
246
246
  - `firstN` without `--unprocessed-only` can re-run already-processed rows.
247
+ - With `--unprocessed-only`, `firstN` means "first N currently eligible unprocessed rows", not "exactly N new rows since your last check".
248
+ - If you need an exact count (for example exactly 5 rows), use `run-next` below.
247
249
  - `--wait` polls `/api/bulk-jobs/{job_id}` until terminal status.
248
250
  - If a job stays `queued`, workers for that provider queue may be scaled to `0`.
249
251
 
252
+ ## Exact count runs (`run-next`)
253
+
254
+ Use this when you need exactly `N` rows in one run.
255
+ The CLI selects candidate row IDs first, then executes `/run` with `scope=subset`.
256
+
257
+ ```bash
258
+ # Run exactly 5 unprocessed rows from the current view
259
+ autotouch columns run-next \
260
+ --table-id <TABLE_ID> \
261
+ --column-id <COLUMN_ID> \
262
+ --count 5 \
263
+ --filters-file filters.json \
264
+ --show-estimate \
265
+ --wait
266
+ ```
267
+
268
+ Notes:
269
+ - Default behavior is unprocessed-only selection.
270
+ - Add `--include-processed` to allow already-processed rows into candidate selection.
271
+ - `run-next` is deterministic on count (subject to available eligible rows).
272
+ - If fewer than `N` eligible rows exist, it runs the available subset and reports selected count.
273
+
250
274
  ### Agent execution contract (strict)
251
275
 
252
276
  When operating this CLI as an agent, use backend job state as source of truth:
@@ -414,6 +438,28 @@ autotouch columns run \
414
438
  --show-estimate --wait
415
439
  ```
416
440
 
441
+ ### Cost tip: filter out empty rows between enrichments
442
+
443
+ Most teams run paid enrichments only on rows that already have required upstream data.
444
+ This avoids spending credits on rows that cannot produce useful results yet.
445
+
446
+ Example: run email finder only when `linkedin_url` exists and `work_email_address` is still empty.
447
+
448
+ ```json
449
+ {
450
+ "mode": "and",
451
+ "filters": [
452
+ { "columnKey": "linkedin_url", "operator": "isNotEmpty" },
453
+ { "columnKey": "work_email_address", "operator": "isEmpty" }
454
+ ]
455
+ }
456
+ ```
457
+
458
+ Pattern to reuse:
459
+ - Step 1: create/select a filter that excludes empty prerequisite fields.
460
+ - Step 2: run small (`firstN` or `run-next`) with `--show-estimate`.
461
+ - Step 3: expand only after output quality looks good.
462
+
417
463
  ## Auto-run configuration
418
464
 
419
465
  Auto-run is set on the column definition (`autoRun`) and can be changed later with `columns update`.
@@ -209,7 +209,7 @@ autotouch jobs get --job-id <JOB_ID>
209
209
 
210
210
  ## Safe run patterns (`firstN` + `--unprocessed-only`)
211
211
 
212
- Use this pattern to avoid paying twice for the same top rows.
212
+ Use this pattern for progressive rollouts.
213
213
 
214
214
  ```bash
215
215
  # Pilot first 10 rows
@@ -235,9 +235,33 @@ autotouch columns run \
235
235
 
236
236
  Notes:
237
237
  - `firstN` without `--unprocessed-only` can re-run already-processed rows.
238
+ - With `--unprocessed-only`, `firstN` means "first N currently eligible unprocessed rows", not "exactly N new rows since your last check".
239
+ - If you need an exact count (for example exactly 5 rows), use `run-next` below.
238
240
  - `--wait` polls `/api/bulk-jobs/{job_id}` until terminal status.
239
241
  - If a job stays `queued`, workers for that provider queue may be scaled to `0`.
240
242
 
243
+ ## Exact count runs (`run-next`)
244
+
245
+ Use this when you need exactly `N` rows in one run.
246
+ The CLI selects candidate row IDs first, then executes `/run` with `scope=subset`.
247
+
248
+ ```bash
249
+ # Run exactly 5 unprocessed rows from the current view
250
+ autotouch columns run-next \
251
+ --table-id <TABLE_ID> \
252
+ --column-id <COLUMN_ID> \
253
+ --count 5 \
254
+ --filters-file filters.json \
255
+ --show-estimate \
256
+ --wait
257
+ ```
258
+
259
+ Notes:
260
+ - Default behavior is unprocessed-only selection.
261
+ - Add `--include-processed` to allow already-processed rows into candidate selection.
262
+ - `run-next` is deterministic on count (subject to available eligible rows).
263
+ - If fewer than `N` eligible rows exist, it runs the available subset and reports selected count.
264
+
241
265
  ### Agent execution contract (strict)
242
266
 
243
267
  When operating this CLI as an agent, use backend job state as source of truth:
@@ -405,6 +429,28 @@ autotouch columns run \
405
429
  --show-estimate --wait
406
430
  ```
407
431
 
432
+ ### Cost tip: filter out empty rows between enrichments
433
+
434
+ Most teams run paid enrichments only on rows that already have required upstream data.
435
+ This avoids spending credits on rows that cannot produce useful results yet.
436
+
437
+ Example: run email finder only when `linkedin_url` exists and `work_email_address` is still empty.
438
+
439
+ ```json
440
+ {
441
+ "mode": "and",
442
+ "filters": [
443
+ { "columnKey": "linkedin_url", "operator": "isNotEmpty" },
444
+ { "columnKey": "work_email_address", "operator": "isEmpty" }
445
+ ]
446
+ }
447
+ ```
448
+
449
+ Pattern to reuse:
450
+ - Step 1: create/select a filter that excludes empty prerequisite fields.
451
+ - Step 2: run small (`firstN` or `run-next`) with `--show-estimate`.
452
+ - Step 3: expand only after output quality looks good.
453
+
408
454
  ## Auto-run configuration
409
455
 
410
456
  Auto-run is set on the column definition (`autoRun`) and can be changed later with `columns update`.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "autotouch-cli"
7
- version = "0.2.7"
7
+ version = "0.2.9"
8
8
  description = "Autotouch Smart Table CLI"
9
9
  readme = "docs/research-table/reference/autotouch-cli.md"
10
10
  requires-python = ">=3.9"
@@ -604,6 +604,304 @@ def _normalize_run_payload(args: argparse.Namespace) -> Dict[str, Any]:
604
604
  return payload
605
605
 
606
606
 
607
+ def _resolve_column_key(
608
+ *,
609
+ table_id: str,
610
+ column_id: str,
611
+ base_url: str,
612
+ token: str,
613
+ use_x_api_key: bool,
614
+ timeout: int,
615
+ verbose: bool,
616
+ ) -> str:
617
+ columns_raw = _request_api(
618
+ "GET",
619
+ f"/api/tables/{table_id}/columns",
620
+ base_url=base_url,
621
+ token=token,
622
+ use_x_api_key=use_x_api_key,
623
+ timeout=timeout,
624
+ verbose=verbose,
625
+ )
626
+ if isinstance(columns_raw, list):
627
+ columns = columns_raw
628
+ elif isinstance(columns_raw, dict):
629
+ columns = columns_raw.get("columns") or columns_raw.get("items") or columns_raw.get("data") or []
630
+ else:
631
+ columns = []
632
+
633
+ target = str(column_id)
634
+ for col in columns:
635
+ if not isinstance(col, dict):
636
+ continue
637
+ cid = str(col.get("id") or col.get("_id") or "")
638
+ if cid != target:
639
+ continue
640
+ key = str(col.get("key") or "").strip()
641
+ if key:
642
+ return key
643
+ break
644
+
645
+ print(f"ERROR: failed to resolve column key for column_id={column_id}", file=sys.stderr)
646
+ sys.exit(1)
647
+
648
+
649
+ def _is_processed_cell_value(value: Any) -> bool:
650
+ if value is None:
651
+ return False
652
+ if isinstance(value, str):
653
+ return value.strip() != ""
654
+ if isinstance(value, (list, tuple, set, dict)):
655
+ return len(value) > 0
656
+ return True
657
+
658
+
659
+ def _select_next_row_ids(
660
+ *,
661
+ table_id: str,
662
+ column_id: str,
663
+ count: int,
664
+ filters: Optional[Dict[str, Any]],
665
+ unprocessed_only: bool,
666
+ page_size: int,
667
+ base_url: str,
668
+ token: str,
669
+ use_x_api_key: bool,
670
+ timeout: int,
671
+ verbose: bool,
672
+ ) -> Dict[str, Any]:
673
+ if count <= 0:
674
+ return {"row_ids": [], "requested": 0, "selected": 0, "scanned_rows": 0}
675
+
676
+ column_key = _resolve_column_key(
677
+ table_id=table_id,
678
+ column_id=column_id,
679
+ base_url=base_url,
680
+ token=token,
681
+ use_x_api_key=use_x_api_key,
682
+ timeout=timeout,
683
+ verbose=verbose,
684
+ )
685
+
686
+ selected: List[str] = []
687
+ seen: set[str] = set()
688
+ scanned_rows = 0
689
+ page_count = 0
690
+ cursor: Optional[str] = None
691
+
692
+ effective_page_size = max(1, min(int(page_size or 200), 1000))
693
+ filters_payload = filters if isinstance(filters, dict) else None
694
+
695
+ while len(selected) < count:
696
+ page_count += 1
697
+ params: Dict[str, Any] = {"page_size": effective_page_size}
698
+ if cursor:
699
+ params["cursor"] = cursor
700
+ if filters_payload:
701
+ params["filters"] = json.dumps(filters_payload, separators=(",", ":"))
702
+
703
+ page = _request_api(
704
+ "GET",
705
+ f"/api/tables/{table_id}/rows",
706
+ base_url=base_url,
707
+ token=token,
708
+ use_x_api_key=use_x_api_key,
709
+ params=params,
710
+ timeout=timeout,
711
+ verbose=verbose,
712
+ )
713
+ if not isinstance(page, dict):
714
+ print(f"ERROR: unexpected rows response: {page}", file=sys.stderr)
715
+ sys.exit(1)
716
+
717
+ rows = page.get("rows") or []
718
+ if not isinstance(rows, list):
719
+ print(f"ERROR: rows payload is not a list: {type(rows).__name__}", file=sys.stderr)
720
+ sys.exit(1)
721
+
722
+ for row in rows:
723
+ if not isinstance(row, dict):
724
+ continue
725
+ scanned_rows += 1
726
+ row_id = str(row.get("_id") or row.get("id") or row.get("rowId") or "").strip()
727
+ if not row_id or row_id in seen:
728
+ continue
729
+ seen.add(row_id)
730
+
731
+ if unprocessed_only and _is_processed_cell_value(row.get(column_key)):
732
+ continue
733
+
734
+ selected.append(row_id)
735
+ if len(selected) >= count:
736
+ break
737
+
738
+ has_more = bool(page.get("hasMore") if "hasMore" in page else page.get("has_more"))
739
+ next_cursor = page.get("nextCursor") if page.get("nextCursor") is not None else page.get("next_cursor")
740
+ if len(selected) >= count:
741
+ break
742
+ if not has_more or not next_cursor:
743
+ break
744
+ cursor = str(next_cursor)
745
+
746
+ return {
747
+ "row_ids": selected,
748
+ "requested": int(count),
749
+ "selected": len(selected),
750
+ "scanned_rows": scanned_rows,
751
+ "pages_scanned": page_count,
752
+ "column_key": column_key,
753
+ "used_filters": bool(filters_payload),
754
+ "unprocessed_only": bool(unprocessed_only),
755
+ }
756
+
757
+
758
+ def _execute_run_flow(
759
+ *,
760
+ args: argparse.Namespace,
761
+ token: str,
762
+ payload: Dict[str, Any],
763
+ context: Optional[Dict[str, Any]] = None,
764
+ ) -> None:
765
+ estimate_data: Optional[Dict[str, Any]] = None
766
+ should_estimate = bool(args.show_estimate or args.max_credits is not None or args.dry_run)
767
+ if should_estimate:
768
+ estimate_raw = _request_api(
769
+ "POST",
770
+ f"/api/tables/{args.table_id}/columns/{args.column_id}/estimate",
771
+ base_url=args.base_url,
772
+ token=token,
773
+ use_x_api_key=args.use_x_api_key,
774
+ payload=payload,
775
+ timeout=args.timeout,
776
+ verbose=args.verbose,
777
+ )
778
+ if not isinstance(estimate_raw, dict):
779
+ print(f"ERROR: unexpected estimate response: {estimate_raw}", file=sys.stderr)
780
+ sys.exit(1)
781
+ estimate_data = estimate_raw
782
+
783
+ if args.max_credits is not None:
784
+ if estimate_data is None:
785
+ print("ERROR: failed to compute estimate for --max-credits guard", file=sys.stderr)
786
+ sys.exit(1)
787
+ limit = float(args.max_credits)
788
+ estimated_max = estimate_data.get("estimated_credits_max")
789
+ estimated_min = float(estimate_data.get("estimated_credits_min") or 0.0)
790
+
791
+ if estimated_max is None and not bool(args.allow_unknown_max):
792
+ output = {
793
+ "blocked": True,
794
+ "reason": "estimated_credits_max is unknown; pass --allow-unknown-max to proceed",
795
+ "max_credits_limit": limit,
796
+ "estimate": estimate_data,
797
+ }
798
+ if context is not None:
799
+ output["context"] = context
800
+ _print_json(output, compact=args.compact)
801
+ sys.exit(3)
802
+
803
+ compare_value = float(estimated_max if estimated_max is not None else estimated_min)
804
+ if compare_value > limit:
805
+ output = {
806
+ "blocked": True,
807
+ "reason": "estimated credits exceed max-credits limit",
808
+ "max_credits_limit": limit,
809
+ "estimate_compare_value": compare_value,
810
+ "estimate": estimate_data,
811
+ }
812
+ if context is not None:
813
+ output["context"] = context
814
+ _print_json(output, compact=args.compact)
815
+ sys.exit(3)
816
+
817
+ if args.dry_run:
818
+ output = {
819
+ "dry_run": True,
820
+ "run_payload": payload,
821
+ "estimate": estimate_data,
822
+ }
823
+ if context is not None:
824
+ output["context"] = context
825
+ _print_json(output, compact=args.compact)
826
+ return
827
+
828
+ run_data = _request_api(
829
+ "POST",
830
+ f"/api/tables/{args.table_id}/columns/{args.column_id}/run",
831
+ base_url=args.base_url,
832
+ token=token,
833
+ use_x_api_key=args.use_x_api_key,
834
+ payload=payload,
835
+ timeout=args.timeout,
836
+ verbose=args.verbose,
837
+ )
838
+ if not isinstance(run_data, dict):
839
+ output_non_dict: Any = run_data
840
+ if context is not None:
841
+ output_non_dict = {"context": context, "run": run_data}
842
+ _print_json(output_non_dict, compact=args.compact)
843
+ return
844
+
845
+ if args.wait:
846
+ job_id = run_data.get("job_id") or run_data.get("jobId")
847
+ if not job_id:
848
+ print("ERROR: run response missing job_id; cannot wait", file=sys.stderr)
849
+ output = run_data if context is None else {"context": context, "run": run_data}
850
+ _print_json(output, compact=args.compact)
851
+ sys.exit(1)
852
+ if not args.quiet_wait:
853
+ _print_json(
854
+ {
855
+ "job_id": str(job_id),
856
+ "status": "polling_started",
857
+ "hint": "polling /api/bulk-jobs/{job_id}",
858
+ },
859
+ compact=args.compact,
860
+ )
861
+
862
+ poll_result = _poll_job(
863
+ job_id=str(job_id),
864
+ base_url=args.base_url,
865
+ token=token,
866
+ use_x_api_key=args.use_x_api_key,
867
+ interval_seconds=int(args.poll_interval or 2),
868
+ wait_timeout_seconds=int(args.wait_timeout or 0),
869
+ request_timeout_seconds=int(args.timeout or DEFAULT_TIMEOUT_SECONDS),
870
+ verbose=args.verbose,
871
+ compact=args.compact,
872
+ once=False,
873
+ print_updates=not args.quiet_wait,
874
+ )
875
+ final_job = poll_result.get("job") or {}
876
+ timed_out = bool(poll_result.get("timed_out"))
877
+ output = {
878
+ "run": run_data,
879
+ "estimate": estimate_data if args.show_estimate or args.max_credits is not None else None,
880
+ "final_job": final_job,
881
+ "timed_out": timed_out,
882
+ "polls": int(poll_result.get("polls") or 0),
883
+ }
884
+ if context is not None:
885
+ output["context"] = context
886
+ _print_json(output, compact=args.compact)
887
+
888
+ if timed_out:
889
+ sys.exit(4)
890
+ final_status = str((final_job or {}).get("status") or "").lower()
891
+ if args.fail_on_error and final_status in {"error", "cancelled"}:
892
+ sys.exit(1)
893
+ if args.fail_on_partial and final_status == "partial":
894
+ sys.exit(1)
895
+ return
896
+
897
+ output_any: Any = run_data
898
+ if estimate_data is not None and args.show_estimate:
899
+ output_any = {"estimate": estimate_data, "run": run_data}
900
+ if context is not None:
901
+ output_any = {"context": context, "result": output_any}
902
+ _print_json(output_any, compact=args.compact)
903
+
904
+
607
905
  def _create_rows_and_patch_records(
608
906
  *,
609
907
  table_id: str,
@@ -1720,132 +2018,61 @@ def cmd_columns_projections(args: argparse.Namespace) -> None:
1720
2018
  def cmd_columns_run(args: argparse.Namespace) -> None:
1721
2019
  token = _resolve_token(args.token, required=True)
1722
2020
  payload = _normalize_run_payload(args)
1723
- estimate_data: Optional[Dict[str, Any]] = None
1724
- should_estimate = bool(args.show_estimate or args.max_credits is not None or args.dry_run)
1725
- if should_estimate:
1726
- estimate_raw = _request_api(
1727
- "POST",
1728
- f"/api/tables/{args.table_id}/columns/{args.column_id}/estimate",
1729
- base_url=args.base_url,
1730
- token=token,
1731
- use_x_api_key=args.use_x_api_key,
1732
- payload=payload,
1733
- timeout=args.timeout,
1734
- verbose=args.verbose,
1735
- )
1736
- if not isinstance(estimate_raw, dict):
1737
- print(f"ERROR: unexpected estimate response: {estimate_raw}", file=sys.stderr)
1738
- sys.exit(1)
1739
- estimate_data = estimate_raw
2021
+ _execute_run_flow(args=args, token=token, payload=payload, context=None)
1740
2022
 
1741
- if args.max_credits is not None:
1742
- if estimate_data is None:
1743
- print("ERROR: failed to compute estimate for --max-credits guard", file=sys.stderr)
1744
- sys.exit(1)
1745
- limit = float(args.max_credits)
1746
- estimated_max = estimate_data.get("estimated_credits_max")
1747
- estimated_min = float(estimate_data.get("estimated_credits_min") or 0.0)
1748
2023
 
1749
- if estimated_max is None and not bool(args.allow_unknown_max):
1750
- output = {
1751
- "blocked": True,
1752
- "reason": "estimated_credits_max is unknown; pass --allow-unknown-max to proceed",
1753
- "max_credits_limit": limit,
1754
- "estimate": estimate_data,
1755
- }
1756
- _print_json(output, compact=args.compact)
1757
- sys.exit(3)
1758
-
1759
- compare_value = float(estimated_max if estimated_max is not None else estimated_min)
1760
- if compare_value > limit:
1761
- output = {
1762
- "blocked": True,
1763
- "reason": "estimated credits exceed max-credits limit",
1764
- "max_credits_limit": limit,
1765
- "estimate_compare_value": compare_value,
1766
- "estimate": estimate_data,
1767
- }
1768
- _print_json(output, compact=args.compact)
1769
- sys.exit(3)
2024
+ def cmd_columns_run_next(args: argparse.Namespace) -> None:
2025
+ token = _resolve_token(args.token, required=True)
2026
+ requested_count = int(args.count or 0)
2027
+ if requested_count <= 0:
2028
+ print("ERROR: --count must be > 0", file=sys.stderr)
2029
+ sys.exit(2)
1770
2030
 
1771
- if args.dry_run:
1772
- _print_json(
1773
- {
1774
- "dry_run": True,
1775
- "run_payload": payload,
1776
- "estimate": estimate_data,
1777
- },
1778
- compact=args.compact,
1779
- )
1780
- return
2031
+ filters = _load_json_input(
2032
+ inline_json=getattr(args, "filters_json", None),
2033
+ file_path=getattr(args, "filters_file", None),
2034
+ context="filters",
2035
+ default=None,
2036
+ )
2037
+ if filters is not None and not isinstance(filters, dict):
2038
+ print("ERROR: filters payload must be a JSON object", file=sys.stderr)
2039
+ sys.exit(2)
1781
2040
 
1782
- run_data = _request_api(
1783
- "POST",
1784
- f"/api/tables/{args.table_id}/columns/{args.column_id}/run",
2041
+ selection = _select_next_row_ids(
2042
+ table_id=args.table_id,
2043
+ column_id=args.column_id,
2044
+ count=requested_count,
2045
+ filters=filters,
2046
+ unprocessed_only=bool(args.unprocessed_only),
2047
+ page_size=int(args.page_size or 200),
1785
2048
  base_url=args.base_url,
1786
2049
  token=token,
1787
2050
  use_x_api_key=args.use_x_api_key,
1788
- payload=payload,
1789
2051
  timeout=args.timeout,
1790
2052
  verbose=args.verbose,
1791
2053
  )
1792
- if not isinstance(run_data, dict):
1793
- _print_json(run_data, compact=args.compact)
1794
- return
1795
-
1796
- if args.wait:
1797
- job_id = run_data.get("job_id") or run_data.get("jobId")
1798
- if not job_id:
1799
- print("ERROR: run response missing job_id; cannot wait", file=sys.stderr)
1800
- _print_json(run_data, compact=args.compact)
1801
- sys.exit(1)
1802
- if not args.quiet_wait:
1803
- _print_json(
1804
- {
1805
- "job_id": str(job_id),
1806
- "status": "polling_started",
1807
- "hint": "polling /api/bulk-jobs/{job_id}",
1808
- },
1809
- compact=args.compact,
1810
- )
1811
-
1812
- poll_result = _poll_job(
1813
- job_id=str(job_id),
1814
- base_url=args.base_url,
1815
- token=token,
1816
- use_x_api_key=args.use_x_api_key,
1817
- interval_seconds=int(args.poll_interval or 2),
1818
- wait_timeout_seconds=int(args.wait_timeout or 0),
1819
- request_timeout_seconds=int(args.timeout or DEFAULT_TIMEOUT_SECONDS),
1820
- verbose=args.verbose,
1821
- compact=args.compact,
1822
- once=False,
1823
- print_updates=not args.quiet_wait,
1824
- )
1825
- final_job = poll_result.get("job") or {}
1826
- timed_out = bool(poll_result.get("timed_out"))
2054
+ row_ids = selection.get("row_ids") or []
2055
+ if not row_ids:
1827
2056
  output = {
1828
- "run": run_data,
1829
- "estimate": estimate_data if args.show_estimate or args.max_credits is not None else None,
1830
- "final_job": final_job,
1831
- "timed_out": timed_out,
1832
- "polls": int(poll_result.get("polls") or 0),
2057
+ "queued": False,
2058
+ "reason": "no eligible rows found for run-next selection",
2059
+ "selection": selection,
1833
2060
  }
1834
2061
  _print_json(output, compact=args.compact)
1835
-
1836
- if timed_out:
1837
- sys.exit(4)
1838
- final_status = str((final_job or {}).get("status") or "").lower()
1839
- if args.fail_on_error and final_status in {"error", "cancelled"}:
1840
- sys.exit(1)
1841
- if args.fail_on_partial and final_status == "partial":
1842
- sys.exit(1)
2062
+ if args.fail_if_empty:
2063
+ sys.exit(3)
1843
2064
  return
1844
2065
 
1845
- output: Any = run_data
1846
- if estimate_data is not None and args.show_estimate:
1847
- output = {"estimate": estimate_data, "run": run_data}
1848
- _print_json(output, compact=args.compact)
2066
+ payload: Dict[str, Any] = {"scope": "subset", "rowIds": row_ids}
2067
+ if args.unprocessed_only:
2068
+ payload["unprocessedOnly"] = True
2069
+
2070
+ context = {
2071
+ "mode": "run-next",
2072
+ "selection": {k: v for k, v in selection.items() if k != "row_ids"},
2073
+ "row_ids": row_ids,
2074
+ }
2075
+ _execute_run_flow(args=args, token=token, payload=payload, context=context)
1849
2076
 
1850
2077
 
1851
2078
  def cmd_columns_estimate(args: argparse.Namespace) -> None:
@@ -2272,6 +2499,25 @@ def build_parser() -> argparse.ArgumentParser:
2272
2499
  _add_api_common_arguments(pcr)
2273
2500
  pcr.set_defaults(func=cmd_columns_run)
2274
2501
 
2502
+ pcrn = col_sub.add_parser("run-next", help="Run exactly N selected rows using subset scope")
2503
+ pcrn.add_argument("--table-id", required=True)
2504
+ pcrn.add_argument("--column-id", required=True)
2505
+ pcrn.add_argument("--count", type=int, required=True, help="Exact number of rows to queue")
2506
+ pcrn.add_argument("--filters-json", help="Optional JSON object to select from filtered rows")
2507
+ pcrn.add_argument("--filters-file", help="Optional JSON file to select from filtered rows")
2508
+ pcrn.add_argument("--page-size", type=int, default=200, help="Rows page size while selecting candidates (max 1000)")
2509
+ pcrn.add_argument(
2510
+ "--include-processed",
2511
+ dest="unprocessed_only",
2512
+ action="store_false",
2513
+ help="Include rows that already have output in candidate selection",
2514
+ )
2515
+ pcrn.add_argument("--fail-if-empty", action="store_true", help="Exit non-zero when no eligible rows are selected")
2516
+ pcrn.set_defaults(unprocessed_only=True)
2517
+ _add_run_execution_arguments(pcrn)
2518
+ _add_api_common_arguments(pcrn)
2519
+ pcrn.set_defaults(func=cmd_columns_run_next)
2520
+
2275
2521
  pce = col_sub.add_parser("estimate", help="Estimate a column run")
2276
2522
  pce.add_argument("--table-id", required=True)
2277
2523
  pce.add_argument("--column-id", required=True)
@@ -2340,6 +2586,25 @@ def build_parser() -> argparse.ArgumentParser:
2340
2586
  _add_api_common_arguments(palias_run)
2341
2587
  palias_run.set_defaults(func=cmd_columns_run)
2342
2588
 
2589
+ palias_run_next = sub.add_parser("run-next", help="Alias for: columns run-next")
2590
+ palias_run_next.add_argument("--table-id", required=True)
2591
+ palias_run_next.add_argument("--column-id", required=True)
2592
+ palias_run_next.add_argument("--count", type=int, required=True, help="Exact number of rows to queue")
2593
+ palias_run_next.add_argument("--filters-json", help="Optional JSON object to select from filtered rows")
2594
+ palias_run_next.add_argument("--filters-file", help="Optional JSON file to select from filtered rows")
2595
+ palias_run_next.add_argument("--page-size", type=int, default=200, help="Rows page size while selecting candidates (max 1000)")
2596
+ palias_run_next.add_argument(
2597
+ "--include-processed",
2598
+ dest="unprocessed_only",
2599
+ action="store_false",
2600
+ help="Include rows that already have output in candidate selection",
2601
+ )
2602
+ palias_run_next.add_argument("--fail-if-empty", action="store_true", help="Exit non-zero when no eligible rows are selected")
2603
+ palias_run_next.set_defaults(unprocessed_only=True)
2604
+ _add_run_execution_arguments(palias_run_next)
2605
+ _add_api_common_arguments(palias_run_next)
2606
+ palias_run_next.set_defaults(func=cmd_columns_run_next)
2607
+
2343
2608
  # status
2344
2609
  ps = sub.add_parser("status", help="Show pending/done/error counts for a table/column (Mongo)")
2345
2610
  ps.add_argument("--table-id", required=True)
File without changes