autotouch-cli 0.2.6__tar.gz → 0.2.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/PKG-INFO +67 -2
  2. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/autotouch_cli.egg-info/PKG-INFO +67 -2
  3. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/docs/research-table/reference/autotouch-cli.md +66 -1
  4. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/pyproject.toml +1 -1
  5. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/smart_table_cli.py +376 -111
  6. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/autotouch_cli.egg-info/SOURCES.txt +0 -0
  7. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/autotouch_cli.egg-info/dependency_links.txt +0 -0
  8. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/autotouch_cli.egg-info/entry_points.txt +0 -0
  9. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/autotouch_cli.egg-info/requires.txt +0 -0
  10. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/autotouch_cli.egg-info/top_level.txt +0 -0
  11. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/__init__.py +0 -0
  12. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/add_column_unique_index.py +0 -0
  13. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/attach_csv_import_leads_to_research_table.py +0 -0
  14. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/bundle_sequences_backend.py +0 -0
  15. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/check_agent_traces.py +0 -0
  16. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/check_column_mode.py +0 -0
  17. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/exit_terminal_leads_from_sequences.py +0 -0
  18. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/fetch_lead.py +0 -0
  19. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/fix_lead_titles_from_csv.py +0 -0
  20. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20250106_add_column_position.py +0 -0
  21. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20250108_fix_legacy_column_fields.py +0 -0
  22. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20250109_add_user_fields_to_tables.py +0 -0
  23. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20250117_add_call_logs_webhook_indexes.py +0 -0
  24. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20250117_rename_call_logs_collection.py +0 -0
  25. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20250119_create_leads_unique_email_index.py +0 -0
  26. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20250123_add_filter_indexes.py +0 -0
  27. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20250123_add_llm_responses_collection.py +0 -0
  28. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20250128_migrate_user_ids_to_objectid.py +0 -0
  29. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20250208_backfill_task_research_values.py +0 -0
  30. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20250604_add_origin_indexes.py +0 -0
  31. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20250608_cleanup_agent_metadata.py +0 -0
  32. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20250608_rename_agent_metadata_to_metadata.py +0 -0
  33. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20250922_add_activity_indexes.py +0 -0
  34. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20250926_migrate_single_to_arrays.py +0 -0
  35. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20250928_add_missing_timestamp_fields.py +0 -0
  36. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20250929_add_task_join_indexes.py +0 -0
  37. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20250929_add_task_join_indexes_safe.py +0 -0
  38. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20250929_create_shared_phone_cache.py +0 -0
  39. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20251007_add_rows_position_id_index.py +0 -0
  40. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20251109_add_ttl_for_llm_and_preview_traces.py +0 -0
  41. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20260113_normalize_table_filter_operators.py +0 -0
  42. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20260113_set_user_permissions_user_admin.py +0 -0
  43. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/20260204_sync_lead_owner_from_tasks.py +0 -0
  44. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/migrate_org_user_credits.py +0 -0
  45. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/set_default_lead_status.py +0 -0
  46. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/migrations/update_lead_owner_from_tasks.py +0 -0
  47. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/reassign_sequence_owner.py +0 -0
  48. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/run_sidecar_orchestrator_demo.py +0 -0
  49. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/test_crm_company_policy.py +0 -0
  50. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/test_sequences_instantly_e2e.py +0 -0
  51. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/test_sequences_personal_e2e.py +0 -0
  52. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/scripts/test_task_error_logger.py +0 -0
  53. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/setup.cfg +0 -0
  54. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/tests/test_contactout_custom.py +0 -0
  55. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/tests/test_contactout_integration.py +0 -0
  56. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/tests/test_contactout_multi_titles.py +0 -0
  57. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/tests/test_contactout_pipeline.py +0 -0
  58. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/tests/test_contactout_simple.py +0 -0
  59. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/tests/test_contactout_v2_bulk.py +0 -0
  60. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/tests/test_lead_required_fields.py +0 -0
  61. {autotouch_cli-0.2.6 → autotouch_cli-0.2.8}/tests/test_phone_provider_pipeline.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: autotouch-cli
3
- Version: 0.2.6
3
+ Version: 0.2.8
4
4
  Summary: Autotouch Smart Table CLI
5
5
  Requires-Python: >=3.9
6
6
  Description-Content-Type: text/markdown
@@ -218,7 +218,7 @@ autotouch jobs get --job-id <JOB_ID>
218
218
 
219
219
  ## Safe run patterns (`firstN` + `--unprocessed-only`)
220
220
 
221
- Use this pattern to avoid paying twice for the same top rows.
221
+ Use this pattern for progressive rollouts.
222
222
 
223
223
  ```bash
224
224
  # Pilot first 10 rows
@@ -244,9 +244,74 @@ autotouch columns run \
244
244
 
245
245
  Notes:
246
246
  - `firstN` without `--unprocessed-only` can re-run already-processed rows.
247
+ - With `--unprocessed-only`, `firstN` means "first N currently eligible unprocessed rows", not "exactly N new rows since your last check".
248
+ - If you need an exact count (for example exactly 5 rows), use `run-next` below.
247
249
  - `--wait` polls `/api/bulk-jobs/{job_id}` until terminal status.
248
250
  - If a job stays `queued`, workers for that provider queue may be scaled to `0`.
249
251
 
252
+ ## Exact count runs (`run-next`)
253
+
254
+ Use this when you need exactly `N` rows in one run.
255
+ The CLI selects candidate row IDs first, then executes `/run` with `scope=subset`.
256
+
257
+ ```bash
258
+ # Run exactly 5 unprocessed rows from the current view
259
+ autotouch columns run-next \
260
+ --table-id <TABLE_ID> \
261
+ --column-id <COLUMN_ID> \
262
+ --count 5 \
263
+ --filters-file filters.json \
264
+ --show-estimate \
265
+ --wait
266
+ ```
267
+
268
+ Notes:
269
+ - Default behavior is unprocessed-only selection.
270
+ - Add `--include-processed` to allow already-processed rows into candidate selection.
271
+ - `run-next` is deterministic on count (subject to available eligible rows).
272
+ - If fewer than `N` eligible rows exist, it runs the available subset and reports selected count.
273
+
274
+ ### Agent execution contract (strict)
275
+
276
+ When operating this CLI as an agent, use backend job state as source of truth:
277
+
278
+ 1. Treat a run as started only if `/run` returns a `jobId` (`job_id`).
279
+ 2. Treat a run as completed only when `GET /api/bulk-jobs/{job_id}` returns terminal status.
280
+ 3. Never infer progress/completion from local process liveness alone.
281
+ 4. If polling is blocked by local network/approval/sandbox constraints, report "run state not confirmed" (do not claim still running/completed).
282
+
283
+ Terminal status values:
284
+
285
+ - `completed`
286
+ - `partial`
287
+ - `error`
288
+ - `cancelled`
289
+
290
+ Non-terminal status values:
291
+
292
+ - `queued`
293
+ - `distributing`
294
+ - `processing`
295
+
296
+ ### Canonical fallback (when `--wait` is noisy in your runtime)
297
+
298
+ ```bash
299
+ # 1) Queue run and capture jobId
300
+ autotouch columns run \
301
+ --table-id <TABLE_ID> \
302
+ --column-id <COLUMN_ID> \
303
+ --scope firstN \
304
+ --first-n 15 \
305
+ --unprocessed-only \
306
+ --show-estimate \
307
+ --output json
308
+
309
+ # 2) Poll backend truth directly
310
+ autotouch jobs get --job-id <JOB_ID> --output json
311
+ ```
312
+
313
+ Repeat `jobs get` until status is terminal.
314
+
250
315
  ## CSV import (agent-safe, async-first)
251
316
 
252
317
  `rows import-csv` defaults to optimized import transport (`/import-optimized`) so large files do not fail on a single long request.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: autotouch-cli
3
- Version: 0.2.6
3
+ Version: 0.2.8
4
4
  Summary: Autotouch Smart Table CLI
5
5
  Requires-Python: >=3.9
6
6
  Description-Content-Type: text/markdown
@@ -218,7 +218,7 @@ autotouch jobs get --job-id <JOB_ID>
218
218
 
219
219
  ## Safe run patterns (`firstN` + `--unprocessed-only`)
220
220
 
221
- Use this pattern to avoid paying twice for the same top rows.
221
+ Use this pattern for progressive rollouts.
222
222
 
223
223
  ```bash
224
224
  # Pilot first 10 rows
@@ -244,9 +244,74 @@ autotouch columns run \
244
244
 
245
245
  Notes:
246
246
  - `firstN` without `--unprocessed-only` can re-run already-processed rows.
247
+ - With `--unprocessed-only`, `firstN` means "first N currently eligible unprocessed rows", not "exactly N new rows since your last check".
248
+ - If you need an exact count (for example exactly 5 rows), use `run-next` below.
247
249
  - `--wait` polls `/api/bulk-jobs/{job_id}` until terminal status.
248
250
  - If a job stays `queued`, workers for that provider queue may be scaled to `0`.
249
251
 
252
+ ## Exact count runs (`run-next`)
253
+
254
+ Use this when you need exactly `N` rows in one run.
255
+ The CLI selects candidate row IDs first, then executes `/run` with `scope=subset`.
256
+
257
+ ```bash
258
+ # Run exactly 5 unprocessed rows from the current view
259
+ autotouch columns run-next \
260
+ --table-id <TABLE_ID> \
261
+ --column-id <COLUMN_ID> \
262
+ --count 5 \
263
+ --filters-file filters.json \
264
+ --show-estimate \
265
+ --wait
266
+ ```
267
+
268
+ Notes:
269
+ - Default behavior is unprocessed-only selection.
270
+ - Add `--include-processed` to allow already-processed rows into candidate selection.
271
+ - `run-next` is deterministic on count (subject to available eligible rows).
272
+ - If fewer than `N` eligible rows exist, it runs the available subset and reports selected count.
273
+
274
+ ### Agent execution contract (strict)
275
+
276
+ When operating this CLI as an agent, use backend job state as source of truth:
277
+
278
+ 1. Treat a run as started only if `/run` returns a `jobId` (`job_id`).
279
+ 2. Treat a run as completed only when `GET /api/bulk-jobs/{job_id}` returns terminal status.
280
+ 3. Never infer progress/completion from local process liveness alone.
281
+ 4. If polling is blocked by local network/approval/sandbox constraints, report "run state not confirmed" (do not claim still running/completed).
282
+
283
+ Terminal status values:
284
+
285
+ - `completed`
286
+ - `partial`
287
+ - `error`
288
+ - `cancelled`
289
+
290
+ Non-terminal status values:
291
+
292
+ - `queued`
293
+ - `distributing`
294
+ - `processing`
295
+
296
+ ### Canonical fallback (when `--wait` is noisy in your runtime)
297
+
298
+ ```bash
299
+ # 1) Queue run and capture jobId
300
+ autotouch columns run \
301
+ --table-id <TABLE_ID> \
302
+ --column-id <COLUMN_ID> \
303
+ --scope firstN \
304
+ --first-n 15 \
305
+ --unprocessed-only \
306
+ --show-estimate \
307
+ --output json
308
+
309
+ # 2) Poll backend truth directly
310
+ autotouch jobs get --job-id <JOB_ID> --output json
311
+ ```
312
+
313
+ Repeat `jobs get` until status is terminal.
314
+
250
315
  ## CSV import (agent-safe, async-first)
251
316
 
252
317
  `rows import-csv` defaults to optimized import transport (`/import-optimized`) so large files do not fail on a single long request.
@@ -209,7 +209,7 @@ autotouch jobs get --job-id <JOB_ID>
209
209
 
210
210
  ## Safe run patterns (`firstN` + `--unprocessed-only`)
211
211
 
212
- Use this pattern to avoid paying twice for the same top rows.
212
+ Use this pattern for progressive rollouts.
213
213
 
214
214
  ```bash
215
215
  # Pilot first 10 rows
@@ -235,9 +235,74 @@ autotouch columns run \
235
235
 
236
236
  Notes:
237
237
  - `firstN` without `--unprocessed-only` can re-run already-processed rows.
238
+ - With `--unprocessed-only`, `firstN` means "first N currently eligible unprocessed rows", not "exactly N new rows since your last check".
239
+ - If you need an exact count (for example exactly 5 rows), use `run-next` below.
238
240
  - `--wait` polls `/api/bulk-jobs/{job_id}` until terminal status.
239
241
  - If a job stays `queued`, workers for that provider queue may be scaled to `0`.
240
242
 
243
+ ## Exact count runs (`run-next`)
244
+
245
+ Use this when you need exactly `N` rows in one run.
246
+ The CLI selects candidate row IDs first, then executes `/run` with `scope=subset`.
247
+
248
+ ```bash
249
+ # Run exactly 5 unprocessed rows from the current view
250
+ autotouch columns run-next \
251
+ --table-id <TABLE_ID> \
252
+ --column-id <COLUMN_ID> \
253
+ --count 5 \
254
+ --filters-file filters.json \
255
+ --show-estimate \
256
+ --wait
257
+ ```
258
+
259
+ Notes:
260
+ - Default behavior is unprocessed-only selection.
261
+ - Add `--include-processed` to allow already-processed rows into candidate selection.
262
+ - `run-next` is deterministic on count (subject to available eligible rows).
263
+ - If fewer than `N` eligible rows exist, it runs the available subset and reports selected count.
264
+
265
+ ### Agent execution contract (strict)
266
+
267
+ When operating this CLI as an agent, use backend job state as source of truth:
268
+
269
+ 1. Treat a run as started only if `/run` returns a `jobId` (`job_id`).
270
+ 2. Treat a run as completed only when `GET /api/bulk-jobs/{job_id}` returns terminal status.
271
+ 3. Never infer progress/completion from local process liveness alone.
272
+ 4. If polling is blocked by local network/approval/sandbox constraints, report "run state not confirmed" (do not claim still running/completed).
273
+
274
+ Terminal status values:
275
+
276
+ - `completed`
277
+ - `partial`
278
+ - `error`
279
+ - `cancelled`
280
+
281
+ Non-terminal status values:
282
+
283
+ - `queued`
284
+ - `distributing`
285
+ - `processing`
286
+
287
+ ### Canonical fallback (when `--wait` is noisy in your runtime)
288
+
289
+ ```bash
290
+ # 1) Queue run and capture jobId
291
+ autotouch columns run \
292
+ --table-id <TABLE_ID> \
293
+ --column-id <COLUMN_ID> \
294
+ --scope firstN \
295
+ --first-n 15 \
296
+ --unprocessed-only \
297
+ --show-estimate \
298
+ --output json
299
+
300
+ # 2) Poll backend truth directly
301
+ autotouch jobs get --job-id <JOB_ID> --output json
302
+ ```
303
+
304
+ Repeat `jobs get` until status is terminal.
305
+
241
306
  ## CSV import (agent-safe, async-first)
242
307
 
243
308
  `rows import-csv` defaults to optimized import transport (`/import-optimized`) so large files do not fail on a single long request.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "autotouch-cli"
7
- version = "0.2.6"
7
+ version = "0.2.8"
8
8
  description = "Autotouch Smart Table CLI"
9
9
  readme = "docs/research-table/reference/autotouch-cli.md"
10
10
  requires-python = ">=3.9"
@@ -604,6 +604,304 @@ def _normalize_run_payload(args: argparse.Namespace) -> Dict[str, Any]:
604
604
  return payload
605
605
 
606
606
 
607
+ def _resolve_column_key(
608
+ *,
609
+ table_id: str,
610
+ column_id: str,
611
+ base_url: str,
612
+ token: str,
613
+ use_x_api_key: bool,
614
+ timeout: int,
615
+ verbose: bool,
616
+ ) -> str:
617
+ columns_raw = _request_api(
618
+ "GET",
619
+ f"/api/tables/{table_id}/columns",
620
+ base_url=base_url,
621
+ token=token,
622
+ use_x_api_key=use_x_api_key,
623
+ timeout=timeout,
624
+ verbose=verbose,
625
+ )
626
+ if isinstance(columns_raw, list):
627
+ columns = columns_raw
628
+ elif isinstance(columns_raw, dict):
629
+ columns = columns_raw.get("columns") or columns_raw.get("items") or columns_raw.get("data") or []
630
+ else:
631
+ columns = []
632
+
633
+ target = str(column_id)
634
+ for col in columns:
635
+ if not isinstance(col, dict):
636
+ continue
637
+ cid = str(col.get("id") or col.get("_id") or "")
638
+ if cid != target:
639
+ continue
640
+ key = str(col.get("key") or "").strip()
641
+ if key:
642
+ return key
643
+ break
644
+
645
+ print(f"ERROR: failed to resolve column key for column_id={column_id}", file=sys.stderr)
646
+ sys.exit(1)
647
+
648
+
649
+ def _is_processed_cell_value(value: Any) -> bool:
650
+ if value is None:
651
+ return False
652
+ if isinstance(value, str):
653
+ return value.strip() != ""
654
+ if isinstance(value, (list, tuple, set, dict)):
655
+ return len(value) > 0
656
+ return True
657
+
658
+
659
+ def _select_next_row_ids(
660
+ *,
661
+ table_id: str,
662
+ column_id: str,
663
+ count: int,
664
+ filters: Optional[Dict[str, Any]],
665
+ unprocessed_only: bool,
666
+ page_size: int,
667
+ base_url: str,
668
+ token: str,
669
+ use_x_api_key: bool,
670
+ timeout: int,
671
+ verbose: bool,
672
+ ) -> Dict[str, Any]:
673
+ if count <= 0:
674
+ return {"row_ids": [], "requested": 0, "selected": 0, "scanned_rows": 0}
675
+
676
+ column_key = _resolve_column_key(
677
+ table_id=table_id,
678
+ column_id=column_id,
679
+ base_url=base_url,
680
+ token=token,
681
+ use_x_api_key=use_x_api_key,
682
+ timeout=timeout,
683
+ verbose=verbose,
684
+ )
685
+
686
+ selected: List[str] = []
687
+ seen: set[str] = set()
688
+ scanned_rows = 0
689
+ page_count = 0
690
+ cursor: Optional[str] = None
691
+
692
+ effective_page_size = max(1, min(int(page_size or 200), 1000))
693
+ filters_payload = filters if isinstance(filters, dict) else None
694
+
695
+ while len(selected) < count:
696
+ page_count += 1
697
+ params: Dict[str, Any] = {"page_size": effective_page_size}
698
+ if cursor:
699
+ params["cursor"] = cursor
700
+ if filters_payload:
701
+ params["filters"] = json.dumps(filters_payload, separators=(",", ":"))
702
+
703
+ page = _request_api(
704
+ "GET",
705
+ f"/api/tables/{table_id}/rows",
706
+ base_url=base_url,
707
+ token=token,
708
+ use_x_api_key=use_x_api_key,
709
+ params=params,
710
+ timeout=timeout,
711
+ verbose=verbose,
712
+ )
713
+ if not isinstance(page, dict):
714
+ print(f"ERROR: unexpected rows response: {page}", file=sys.stderr)
715
+ sys.exit(1)
716
+
717
+ rows = page.get("rows") or []
718
+ if not isinstance(rows, list):
719
+ print(f"ERROR: rows payload is not a list: {type(rows).__name__}", file=sys.stderr)
720
+ sys.exit(1)
721
+
722
+ for row in rows:
723
+ if not isinstance(row, dict):
724
+ continue
725
+ scanned_rows += 1
726
+ row_id = str(row.get("_id") or row.get("id") or row.get("rowId") or "").strip()
727
+ if not row_id or row_id in seen:
728
+ continue
729
+ seen.add(row_id)
730
+
731
+ if unprocessed_only and _is_processed_cell_value(row.get(column_key)):
732
+ continue
733
+
734
+ selected.append(row_id)
735
+ if len(selected) >= count:
736
+ break
737
+
738
+ has_more = bool(page.get("hasMore") if "hasMore" in page else page.get("has_more"))
739
+ next_cursor = page.get("nextCursor") if page.get("nextCursor") is not None else page.get("next_cursor")
740
+ if len(selected) >= count:
741
+ break
742
+ if not has_more or not next_cursor:
743
+ break
744
+ cursor = str(next_cursor)
745
+
746
+ return {
747
+ "row_ids": selected,
748
+ "requested": int(count),
749
+ "selected": len(selected),
750
+ "scanned_rows": scanned_rows,
751
+ "pages_scanned": page_count,
752
+ "column_key": column_key,
753
+ "used_filters": bool(filters_payload),
754
+ "unprocessed_only": bool(unprocessed_only),
755
+ }
756
+
757
+
758
+ def _execute_run_flow(
759
+ *,
760
+ args: argparse.Namespace,
761
+ token: str,
762
+ payload: Dict[str, Any],
763
+ context: Optional[Dict[str, Any]] = None,
764
+ ) -> None:
765
+ estimate_data: Optional[Dict[str, Any]] = None
766
+ should_estimate = bool(args.show_estimate or args.max_credits is not None or args.dry_run)
767
+ if should_estimate:
768
+ estimate_raw = _request_api(
769
+ "POST",
770
+ f"/api/tables/{args.table_id}/columns/{args.column_id}/estimate",
771
+ base_url=args.base_url,
772
+ token=token,
773
+ use_x_api_key=args.use_x_api_key,
774
+ payload=payload,
775
+ timeout=args.timeout,
776
+ verbose=args.verbose,
777
+ )
778
+ if not isinstance(estimate_raw, dict):
779
+ print(f"ERROR: unexpected estimate response: {estimate_raw}", file=sys.stderr)
780
+ sys.exit(1)
781
+ estimate_data = estimate_raw
782
+
783
+ if args.max_credits is not None:
784
+ if estimate_data is None:
785
+ print("ERROR: failed to compute estimate for --max-credits guard", file=sys.stderr)
786
+ sys.exit(1)
787
+ limit = float(args.max_credits)
788
+ estimated_max = estimate_data.get("estimated_credits_max")
789
+ estimated_min = float(estimate_data.get("estimated_credits_min") or 0.0)
790
+
791
+ if estimated_max is None and not bool(args.allow_unknown_max):
792
+ output = {
793
+ "blocked": True,
794
+ "reason": "estimated_credits_max is unknown; pass --allow-unknown-max to proceed",
795
+ "max_credits_limit": limit,
796
+ "estimate": estimate_data,
797
+ }
798
+ if context is not None:
799
+ output["context"] = context
800
+ _print_json(output, compact=args.compact)
801
+ sys.exit(3)
802
+
803
+ compare_value = float(estimated_max if estimated_max is not None else estimated_min)
804
+ if compare_value > limit:
805
+ output = {
806
+ "blocked": True,
807
+ "reason": "estimated credits exceed max-credits limit",
808
+ "max_credits_limit": limit,
809
+ "estimate_compare_value": compare_value,
810
+ "estimate": estimate_data,
811
+ }
812
+ if context is not None:
813
+ output["context"] = context
814
+ _print_json(output, compact=args.compact)
815
+ sys.exit(3)
816
+
817
+ if args.dry_run:
818
+ output = {
819
+ "dry_run": True,
820
+ "run_payload": payload,
821
+ "estimate": estimate_data,
822
+ }
823
+ if context is not None:
824
+ output["context"] = context
825
+ _print_json(output, compact=args.compact)
826
+ return
827
+
828
+ run_data = _request_api(
829
+ "POST",
830
+ f"/api/tables/{args.table_id}/columns/{args.column_id}/run",
831
+ base_url=args.base_url,
832
+ token=token,
833
+ use_x_api_key=args.use_x_api_key,
834
+ payload=payload,
835
+ timeout=args.timeout,
836
+ verbose=args.verbose,
837
+ )
838
+ if not isinstance(run_data, dict):
839
+ output_non_dict: Any = run_data
840
+ if context is not None:
841
+ output_non_dict = {"context": context, "run": run_data}
842
+ _print_json(output_non_dict, compact=args.compact)
843
+ return
844
+
845
+ if args.wait:
846
+ job_id = run_data.get("job_id") or run_data.get("jobId")
847
+ if not job_id:
848
+ print("ERROR: run response missing job_id; cannot wait", file=sys.stderr)
849
+ output = run_data if context is None else {"context": context, "run": run_data}
850
+ _print_json(output, compact=args.compact)
851
+ sys.exit(1)
852
+ if not args.quiet_wait:
853
+ _print_json(
854
+ {
855
+ "job_id": str(job_id),
856
+ "status": "polling_started",
857
+ "hint": "polling /api/bulk-jobs/{job_id}",
858
+ },
859
+ compact=args.compact,
860
+ )
861
+
862
+ poll_result = _poll_job(
863
+ job_id=str(job_id),
864
+ base_url=args.base_url,
865
+ token=token,
866
+ use_x_api_key=args.use_x_api_key,
867
+ interval_seconds=int(args.poll_interval or 2),
868
+ wait_timeout_seconds=int(args.wait_timeout or 0),
869
+ request_timeout_seconds=int(args.timeout or DEFAULT_TIMEOUT_SECONDS),
870
+ verbose=args.verbose,
871
+ compact=args.compact,
872
+ once=False,
873
+ print_updates=not args.quiet_wait,
874
+ )
875
+ final_job = poll_result.get("job") or {}
876
+ timed_out = bool(poll_result.get("timed_out"))
877
+ output = {
878
+ "run": run_data,
879
+ "estimate": estimate_data if args.show_estimate or args.max_credits is not None else None,
880
+ "final_job": final_job,
881
+ "timed_out": timed_out,
882
+ "polls": int(poll_result.get("polls") or 0),
883
+ }
884
+ if context is not None:
885
+ output["context"] = context
886
+ _print_json(output, compact=args.compact)
887
+
888
+ if timed_out:
889
+ sys.exit(4)
890
+ final_status = str((final_job or {}).get("status") or "").lower()
891
+ if args.fail_on_error and final_status in {"error", "cancelled"}:
892
+ sys.exit(1)
893
+ if args.fail_on_partial and final_status == "partial":
894
+ sys.exit(1)
895
+ return
896
+
897
+ output_any: Any = run_data
898
+ if estimate_data is not None and args.show_estimate:
899
+ output_any = {"estimate": estimate_data, "run": run_data}
900
+ if context is not None:
901
+ output_any = {"context": context, "result": output_any}
902
+ _print_json(output_any, compact=args.compact)
903
+
904
+
607
905
  def _create_rows_and_patch_records(
608
906
  *,
609
907
  table_id: str,
@@ -1720,132 +2018,61 @@ def cmd_columns_projections(args: argparse.Namespace) -> None:
1720
2018
  def cmd_columns_run(args: argparse.Namespace) -> None:
1721
2019
  token = _resolve_token(args.token, required=True)
1722
2020
  payload = _normalize_run_payload(args)
1723
- estimate_data: Optional[Dict[str, Any]] = None
1724
- should_estimate = bool(args.show_estimate or args.max_credits is not None or args.dry_run)
1725
- if should_estimate:
1726
- estimate_raw = _request_api(
1727
- "POST",
1728
- f"/api/tables/{args.table_id}/columns/{args.column_id}/estimate",
1729
- base_url=args.base_url,
1730
- token=token,
1731
- use_x_api_key=args.use_x_api_key,
1732
- payload=payload,
1733
- timeout=args.timeout,
1734
- verbose=args.verbose,
1735
- )
1736
- if not isinstance(estimate_raw, dict):
1737
- print(f"ERROR: unexpected estimate response: {estimate_raw}", file=sys.stderr)
1738
- sys.exit(1)
1739
- estimate_data = estimate_raw
2021
+ _execute_run_flow(args=args, token=token, payload=payload, context=None)
1740
2022
 
1741
- if args.max_credits is not None:
1742
- if estimate_data is None:
1743
- print("ERROR: failed to compute estimate for --max-credits guard", file=sys.stderr)
1744
- sys.exit(1)
1745
- limit = float(args.max_credits)
1746
- estimated_max = estimate_data.get("estimated_credits_max")
1747
- estimated_min = float(estimate_data.get("estimated_credits_min") or 0.0)
1748
2023
 
1749
- if estimated_max is None and not bool(args.allow_unknown_max):
1750
- output = {
1751
- "blocked": True,
1752
- "reason": "estimated_credits_max is unknown; pass --allow-unknown-max to proceed",
1753
- "max_credits_limit": limit,
1754
- "estimate": estimate_data,
1755
- }
1756
- _print_json(output, compact=args.compact)
1757
- sys.exit(3)
1758
-
1759
- compare_value = float(estimated_max if estimated_max is not None else estimated_min)
1760
- if compare_value > limit:
1761
- output = {
1762
- "blocked": True,
1763
- "reason": "estimated credits exceed max-credits limit",
1764
- "max_credits_limit": limit,
1765
- "estimate_compare_value": compare_value,
1766
- "estimate": estimate_data,
1767
- }
1768
- _print_json(output, compact=args.compact)
1769
- sys.exit(3)
2024
+ def cmd_columns_run_next(args: argparse.Namespace) -> None:
2025
+ token = _resolve_token(args.token, required=True)
2026
+ requested_count = int(args.count or 0)
2027
+ if requested_count <= 0:
2028
+ print("ERROR: --count must be > 0", file=sys.stderr)
2029
+ sys.exit(2)
1770
2030
 
1771
- if args.dry_run:
1772
- _print_json(
1773
- {
1774
- "dry_run": True,
1775
- "run_payload": payload,
1776
- "estimate": estimate_data,
1777
- },
1778
- compact=args.compact,
1779
- )
1780
- return
2031
+ filters = _load_json_input(
2032
+ inline_json=getattr(args, "filters_json", None),
2033
+ file_path=getattr(args, "filters_file", None),
2034
+ context="filters",
2035
+ default=None,
2036
+ )
2037
+ if filters is not None and not isinstance(filters, dict):
2038
+ print("ERROR: filters payload must be a JSON object", file=sys.stderr)
2039
+ sys.exit(2)
1781
2040
 
1782
- run_data = _request_api(
1783
- "POST",
1784
- f"/api/tables/{args.table_id}/columns/{args.column_id}/run",
2041
+ selection = _select_next_row_ids(
2042
+ table_id=args.table_id,
2043
+ column_id=args.column_id,
2044
+ count=requested_count,
2045
+ filters=filters,
2046
+ unprocessed_only=bool(args.unprocessed_only),
2047
+ page_size=int(args.page_size or 200),
1785
2048
  base_url=args.base_url,
1786
2049
  token=token,
1787
2050
  use_x_api_key=args.use_x_api_key,
1788
- payload=payload,
1789
2051
  timeout=args.timeout,
1790
2052
  verbose=args.verbose,
1791
2053
  )
1792
- if not isinstance(run_data, dict):
1793
- _print_json(run_data, compact=args.compact)
1794
- return
1795
-
1796
- if args.wait:
1797
- job_id = run_data.get("job_id") or run_data.get("jobId")
1798
- if not job_id:
1799
- print("ERROR: run response missing job_id; cannot wait", file=sys.stderr)
1800
- _print_json(run_data, compact=args.compact)
1801
- sys.exit(1)
1802
- if not args.quiet_wait:
1803
- _print_json(
1804
- {
1805
- "job_id": str(job_id),
1806
- "status": "polling_started",
1807
- "hint": "polling /api/bulk-jobs/{job_id}",
1808
- },
1809
- compact=args.compact,
1810
- )
1811
-
1812
- poll_result = _poll_job(
1813
- job_id=str(job_id),
1814
- base_url=args.base_url,
1815
- token=token,
1816
- use_x_api_key=args.use_x_api_key,
1817
- interval_seconds=int(args.poll_interval or 2),
1818
- wait_timeout_seconds=int(args.wait_timeout or 0),
1819
- request_timeout_seconds=int(args.timeout or DEFAULT_TIMEOUT_SECONDS),
1820
- verbose=args.verbose,
1821
- compact=args.compact,
1822
- once=False,
1823
- print_updates=not args.quiet_wait,
1824
- )
1825
- final_job = poll_result.get("job") or {}
1826
- timed_out = bool(poll_result.get("timed_out"))
2054
+ row_ids = selection.get("row_ids") or []
2055
+ if not row_ids:
1827
2056
  output = {
1828
- "run": run_data,
1829
- "estimate": estimate_data if args.show_estimate or args.max_credits is not None else None,
1830
- "final_job": final_job,
1831
- "timed_out": timed_out,
1832
- "polls": int(poll_result.get("polls") or 0),
2057
+ "queued": False,
2058
+ "reason": "no eligible rows found for run-next selection",
2059
+ "selection": selection,
1833
2060
  }
1834
2061
  _print_json(output, compact=args.compact)
1835
-
1836
- if timed_out:
1837
- sys.exit(4)
1838
- final_status = str((final_job or {}).get("status") or "").lower()
1839
- if args.fail_on_error and final_status in {"error", "cancelled"}:
1840
- sys.exit(1)
1841
- if args.fail_on_partial and final_status == "partial":
1842
- sys.exit(1)
2062
+ if args.fail_if_empty:
2063
+ sys.exit(3)
1843
2064
  return
1844
2065
 
1845
- output: Any = run_data
1846
- if estimate_data is not None and args.show_estimate:
1847
- output = {"estimate": estimate_data, "run": run_data}
1848
- _print_json(output, compact=args.compact)
2066
+ payload: Dict[str, Any] = {"scope": "subset", "rowIds": row_ids}
2067
+ if args.unprocessed_only:
2068
+ payload["unprocessedOnly"] = True
2069
+
2070
+ context = {
2071
+ "mode": "run-next",
2072
+ "selection": {k: v for k, v in selection.items() if k != "row_ids"},
2073
+ "row_ids": row_ids,
2074
+ }
2075
+ _execute_run_flow(args=args, token=token, payload=payload, context=context)
1849
2076
 
1850
2077
 
1851
2078
  def cmd_columns_estimate(args: argparse.Namespace) -> None:
@@ -2272,6 +2499,25 @@ def build_parser() -> argparse.ArgumentParser:
2272
2499
  _add_api_common_arguments(pcr)
2273
2500
  pcr.set_defaults(func=cmd_columns_run)
2274
2501
 
2502
+ pcrn = col_sub.add_parser("run-next", help="Run exactly N selected rows using subset scope")
2503
+ pcrn.add_argument("--table-id", required=True)
2504
+ pcrn.add_argument("--column-id", required=True)
2505
+ pcrn.add_argument("--count", type=int, required=True, help="Exact number of rows to queue")
2506
+ pcrn.add_argument("--filters-json", help="Optional JSON object to select from filtered rows")
2507
+ pcrn.add_argument("--filters-file", help="Optional JSON file to select from filtered rows")
2508
+ pcrn.add_argument("--page-size", type=int, default=200, help="Rows page size while selecting candidates (max 1000)")
2509
+ pcrn.add_argument(
2510
+ "--include-processed",
2511
+ dest="unprocessed_only",
2512
+ action="store_false",
2513
+ help="Include rows that already have output in candidate selection",
2514
+ )
2515
+ pcrn.add_argument("--fail-if-empty", action="store_true", help="Exit non-zero when no eligible rows are selected")
2516
+ pcrn.set_defaults(unprocessed_only=True)
2517
+ _add_run_execution_arguments(pcrn)
2518
+ _add_api_common_arguments(pcrn)
2519
+ pcrn.set_defaults(func=cmd_columns_run_next)
2520
+
2275
2521
  pce = col_sub.add_parser("estimate", help="Estimate a column run")
2276
2522
  pce.add_argument("--table-id", required=True)
2277
2523
  pce.add_argument("--column-id", required=True)
@@ -2340,6 +2586,25 @@ def build_parser() -> argparse.ArgumentParser:
2340
2586
  _add_api_common_arguments(palias_run)
2341
2587
  palias_run.set_defaults(func=cmd_columns_run)
2342
2588
 
2589
+ palias_run_next = sub.add_parser("run-next", help="Alias for: columns run-next")
2590
+ palias_run_next.add_argument("--table-id", required=True)
2591
+ palias_run_next.add_argument("--column-id", required=True)
2592
+ palias_run_next.add_argument("--count", type=int, required=True, help="Exact number of rows to queue")
2593
+ palias_run_next.add_argument("--filters-json", help="Optional JSON object to select from filtered rows")
2594
+ palias_run_next.add_argument("--filters-file", help="Optional JSON file to select from filtered rows")
2595
+ palias_run_next.add_argument("--page-size", type=int, default=200, help="Rows page size while selecting candidates (max 1000)")
2596
+ palias_run_next.add_argument(
2597
+ "--include-processed",
2598
+ dest="unprocessed_only",
2599
+ action="store_false",
2600
+ help="Include rows that already have output in candidate selection",
2601
+ )
2602
+ palias_run_next.add_argument("--fail-if-empty", action="store_true", help="Exit non-zero when no eligible rows are selected")
2603
+ palias_run_next.set_defaults(unprocessed_only=True)
2604
+ _add_run_execution_arguments(palias_run_next)
2605
+ _add_api_common_arguments(palias_run_next)
2606
+ palias_run_next.set_defaults(func=cmd_columns_run_next)
2607
+
2343
2608
  # status
2344
2609
  ps = sub.add_parser("status", help="Show pending/done/error counts for a table/column (Mongo)")
2345
2610
  ps.add_argument("--table-id", required=True)
File without changes