open-research-protocol 0.4.30 → 0.4.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -103,8 +103,10 @@ artifact paths (code/data/proofs/logs/papers).
103
103
  - Treat **failed paths** as assets: record dead ends as a `Failed Path Record` with the blocking reason/counterexample and a
104
104
  next hook.
105
105
  - Resolve disputes by **verification or downgrade**, not argument.
106
- - Run `orp hygiene --json` before long delegation, after material writeback, before API/remote/paid compute, and when dirty
107
- state grows unexpectedly.
106
+ - Run `orp hygiene --json` before long delegation, after material writeback, before remote side effects or unbudgeted paid
107
+ compute, and when dirty state grows unexpectedly.
108
+ - Do not hard-stop solely because an OpenAI research lane is paid; budgeted ORP research may run when `orp research` spend
109
+ preflight is within the configured daily cap.
108
110
  - Stop long-running expansion while hygiene reports `dirty_unclassified`; classify, refresh generated surfaces, canonicalize
109
111
  useful scratch, or write a blocker before continuing.
110
112
  - Hygiene is non-destructive: never reset, checkout, or delete files merely to hide dirty state.
package/CHANGELOG.md CHANGED
@@ -6,6 +6,32 @@ There was no prior in-repo changelog file, so the first formal entry starts
6
6
  with the currently shipped `v0.4.4` release and summarizes the full release
7
7
  delta reflected in this repo.
8
8
 
9
+ ## v0.4.32 - 2026-04-25
10
+
11
+ This release clarifies ORP's paid-work boundary so budgeted OpenAI research is
12
+ not treated as a hard stop solely because it uses paid API calls.
13
+
14
+ ### Changed
15
+
16
+ - Built-in OpenAI research lanes now require a local spend policy, then use
17
+ spend preflight as the approval boundary for budgeted provider calls.
18
+ - Generated project context, AGENTS guidance, handoffs, and research docs now
19
+ distinguish budgeted ORP research from unbudgeted paid compute, purchases,
20
+ and cap-exceeded provider calls.
21
+
22
+ ## v0.4.31 - 2026-04-25
23
+
24
+ This release refreshes ORP's OpenAI-backed research lanes and tightens
25
+ workspace tab recency ranking for grouped project sessions.
26
+
27
+ ### Changed
28
+
29
+ - Updated built-in OpenAI research profiles to use `gpt-5.5` for high-reasoning,
30
+ web synthesis, and pro-research-style lanes, with Responses API `web_search`
31
+ and `xhigh` reasoning on deep research passes.
32
+ - Workspace tab reports now rank grouped Codex project tabs by the freshest
33
+ tracked session update time while keeping same-project sessions together.
34
+
9
35
  ## v0.4.30 - 2026-04-25
10
36
 
11
37
  This release tightens ORP-managed Codex session tracking so short-lived
package/cli/orp.py CHANGED
@@ -141,9 +141,16 @@ FRONTIER_TERMINAL_STATUSES = {"complete", "completed", "done", "skipped", "termi
141
141
  YOUTUBE_SOURCE_SCHEMA_VERSION = "1.0.0"
142
142
  EXCHANGE_REPORT_SCHEMA_VERSION = "1.0.0"
143
143
  RESEARCH_RUN_SCHEMA_VERSION = "1.0.0"
144
+ OPENAI_RESEARCH_MODEL = "gpt-5.5"
145
+ OPENAI_DEEP_RESEARCH_MODEL = OPENAI_RESEARCH_MODEL
144
146
  SECRET_SPEND_POLICY_SCHEMA_VERSION = "1.0.0"
145
147
  RESEARCH_SPEND_LEDGER_SCHEMA_VERSION = "1.0.0"
146
148
  PROJECT_CONTEXT_SCHEMA_VERSION = "1.0.0"
149
+ HYGIENE_REMOTE_SPEND_MOMENT = "before remote side effects or unbudgeted paid compute"
150
+ BUDGETED_RESEARCH_SPEND_RULE = (
151
+ "Do not hard-stop solely because an OpenAI research lane is paid; budgeted ORP research may run when "
152
+ "`orp research` spend preflight is within the configured daily cap."
153
+ )
147
154
  HYGIENE_POLICY_SCHEMA_VERSION = "1.0.0"
148
155
  MAINTENANCE_STATE_SCHEMA_VERSION = "1.0.0"
149
156
  SCHEDULE_REGISTRY_SCHEMA_VERSION = "1.0.0"
@@ -6919,7 +6926,7 @@ def _default_hygiene_policy() -> dict[str, Any]:
6919
6926
  run_moments = [
6920
6927
  "before long delegation",
6921
6928
  "after material writeback",
6922
- "before API/remote/paid compute",
6929
+ HYGIENE_REMOTE_SPEND_MOMENT,
6923
6930
  "when dirty state grows unexpectedly",
6924
6931
  ]
6925
6932
  self_healing_policy = [
@@ -10661,23 +10668,23 @@ def _project_research_trigger_policy() -> dict[str, Any]:
10661
10668
  "moment_id": "thinking_reasoning_high",
10662
10669
  "calls_api": True,
10663
10670
  "lane": "openai_reasoning_high",
10664
- "model": "gpt-5.4",
10671
+ "model": OPENAI_RESEARCH_MODEL,
10665
10672
  "when": "Use when the directory has a decision gate, route choice, proof strategy, architecture tradeoff, or ambiguous next action.",
10666
10673
  },
10667
10674
  {
10668
10675
  "moment_id": "web_synthesis",
10669
10676
  "calls_api": True,
10670
10677
  "lane": "openai_web_synthesis",
10671
- "model": "gpt-5.4",
10678
+ "model": OPENAI_RESEARCH_MODEL,
10672
10679
  "when": "Use when the answer depends on current public facts, external docs, papers, project status, or citations.",
10673
10680
  },
10674
10681
  {
10675
10682
  "moment_id": "pro_deep_research",
10676
10683
  "calls_api": True,
10677
10684
  "lane": "openai_deep_research",
10678
- "model": "o3-deep-research-2025-06-26",
10685
+ "model": OPENAI_DEEP_RESEARCH_MODEL,
10679
10686
  "when": "Use only after reasoning/web lanes expose a research-heavy gap, disagreement, source-quality issue, or literature-scale synthesis need.",
10680
- "capability_note": "Requires an OpenAI organization verified for Deep Research model access.",
10687
+ "capability_note": "Runs GPT-5.5 with background mode, web search, and xhigh reasoning for a pro-research-style pass.",
10681
10688
  },
10682
10689
  ],
10683
10690
  "skip_research_when": [
@@ -10690,6 +10697,11 @@ def _project_research_trigger_policy() -> dict[str, Any]:
10690
10697
  "the project must compare multiple papers, standards, providers, or public claims",
10691
10698
  "the output needs a citation-rich report rather than a short decision memo",
10692
10699
  ],
10700
+ "spend_policy": {
10701
+ "budgeted_provider_calls": "OpenAI research lanes are paid but allowed when executed through ORP with a configured local spend policy and a passing spend preflight.",
10702
+ "hard_stop_boundary": "Stop for missing required spend policy, missing secret, cap-exceeded preflight, unbudgeted provider spend, purchases, or non-ORP paid compute.",
10703
+ "local_enforcement": "keychain spend policy with local_preflight_reservation",
10704
+ },
10693
10705
  }
10694
10706
 
10695
10707
 
@@ -10709,7 +10721,7 @@ def _project_evolution_policy() -> dict[str, Any]:
10709
10721
  "run_moments": [
10710
10722
  "before long delegation",
10711
10723
  "after material writeback",
10712
- "before API/remote/paid compute",
10724
+ HYGIENE_REMOTE_SPEND_MOMENT,
10713
10725
  "when dirty state grows unexpectedly",
10714
10726
  ],
10715
10727
  "stop_rule": (
@@ -10718,10 +10730,11 @@ def _project_evolution_policy() -> dict[str, Any]:
10718
10730
  "or write a blocker first."
10719
10731
  ),
10720
10732
  "self_healing_rule": "Non-destructive by default: never reset, checkout, or delete files merely to hide dirty state.",
10733
+ "budgeted_research_spend_rule": BUDGETED_RESEARCH_SPEND_RULE,
10721
10734
  },
10722
10735
  "evolution_loop": [
10723
10736
  "scan authority surfaces",
10724
- "run worktree hygiene before expansion or remote spend",
10737
+ "run worktree hygiene before expansion, remote side effects, or unbudgeted spend",
10725
10738
  "classify dirty state as canonical, runtime, source/test, docs, scratch, or blocker",
10726
10739
  "classify what is local, public, executable, or human-gated",
10727
10740
  "choose whether reasoning, web synthesis, or deep research is justified",
@@ -10762,9 +10775,10 @@ def _project_context_payload(repo_root: Path, *, source: str) -> dict[str, Any]:
10762
10775
  "run_moments": [
10763
10776
  "before long delegation",
10764
10777
  "after material writeback",
10765
- "before API/remote/paid compute",
10778
+ HYGIENE_REMOTE_SPEND_MOMENT,
10766
10779
  "when dirty state grows unexpectedly",
10767
10780
  ],
10781
+ "budgeted_research_spend_rule": BUDGETED_RESEARCH_SPEND_RULE,
10768
10782
  },
10769
10783
  "evolution_policy": _project_evolution_policy(),
10770
10784
  "next_actions": [
@@ -10778,6 +10792,7 @@ def _project_context_payload(repo_root: Path, *, source: str) -> dict[str, Any]:
10778
10792
  "This file is ORP process context for the local directory.",
10779
10793
  "It is refreshed as the project evolves and should not be cited as proof or canonical evidence.",
10780
10794
  "Provider research calls remain opt-in through `orp research ask --execute`.",
10795
+ BUDGETED_RESEARCH_SPEND_RULE,
10781
10796
  ],
10782
10797
  }
10783
10798
 
@@ -10910,7 +10925,8 @@ def _init_handoff_template(repo_root: Path, *, default_branch: str, initialized_
10910
10925
  "## Agent Rules\n\n"
10911
10926
  f"- Do not do meaningful implementation work directly on `{default_branch}` unless explicitly allowed.\n"
10912
10927
  "- Create a work branch before substantial edits.\n"
10913
- "- Run `orp hygiene --json` before long delegation, after material writeback, before API/remote/paid compute, and when dirty state grows unexpectedly.\n"
10928
+ f"- Run `orp hygiene --json` before long delegation, after material writeback, {HYGIENE_REMOTE_SPEND_MOMENT}, and when dirty state grows unexpectedly.\n"
10929
+ f"- {BUDGETED_RESEARCH_SPEND_RULE}\n"
10914
10930
  "- Stop long-running expansion while hygiene reports `dirty_unclassified`; classify, refresh generated surfaces, canonicalize useful scratch, or write a blocker.\n"
10915
10931
  "- Hygiene is non-destructive: never reset, checkout, or delete files merely to hide dirty state.\n"
10916
10932
  "- Create a checkpoint commit after each meaningful completed unit of work.\n"
@@ -11096,7 +11112,8 @@ def _render_agent_guide_block(
11096
11112
  [
11097
11113
  "- Preserve human notes outside ORP-managed blocks.",
11098
11114
  "- Use this local file for the project-specific current state, local constraints, and concrete next moves.",
11099
- "- Run `orp hygiene --json` before long delegation, after material writeback, before API/remote/paid compute, and when dirty state grows unexpectedly.",
11115
+ f"- Run `orp hygiene --json` before long delegation, after material writeback, {HYGIENE_REMOTE_SPEND_MOMENT}, and when dirty state grows unexpectedly.",
11116
+ f"- {BUDGETED_RESEARCH_SPEND_RULE}",
11100
11117
  "- Stop long-running expansion while hygiene reports `dirty_unclassified`; classify, refresh generated surfaces, canonicalize useful scratch, or write a blocker.",
11101
11118
  "- Hygiene is non-destructive: never reset, checkout, or delete files merely to hide dirty state.",
11102
11119
  ]
@@ -11514,9 +11531,10 @@ def _agent_policy_payload(
11514
11531
  "run_moments": [
11515
11532
  "before long delegation",
11516
11533
  "after material writeback",
11517
- "before API/remote/paid compute",
11534
+ HYGIENE_REMOTE_SPEND_MOMENT,
11518
11535
  "when dirty state grows unexpectedly",
11519
11536
  ],
11537
+ "budgeted_research_spend_rule": BUDGETED_RESEARCH_SPEND_RULE,
11520
11538
  "required_self_healing": [
11521
11539
  "classify dirty paths",
11522
11540
  "refresh generated surfaces",
@@ -17656,7 +17674,7 @@ def _research_staged_deep_think_profile(profile_id: str = "deep-think-web-think-
17656
17674
  "calls_api": True,
17657
17675
  "secret_alias": "openai-primary",
17658
17676
  "env_var": "OPENAI_API_KEY",
17659
- "description": "Call GPT-5.4 with high reasoning to critique and compress the opening research.",
17677
+ "description": f"Call {OPENAI_RESEARCH_MODEL} with high reasoning to critique and compress the opening research.",
17660
17678
  },
17661
17679
  {
17662
17680
  "moment_id": "think_web_crosscheck",
@@ -17664,7 +17682,7 @@ def _research_staged_deep_think_profile(profile_id: str = "deep-think-web-think-
17664
17682
  "calls_api": True,
17665
17683
  "secret_alias": "openai-primary",
17666
17684
  "env_var": "OPENAI_API_KEY",
17667
- "description": "Call GPT-5.4 with high reasoning and web search to verify recency-sensitive claims.",
17685
+ "description": f"Call {OPENAI_RESEARCH_MODEL} with high reasoning and web search to verify recency-sensitive claims.",
17668
17686
  },
17669
17687
  {
17670
17688
  "moment_id": "think_synthesis",
@@ -17672,7 +17690,7 @@ def _research_staged_deep_think_profile(profile_id: str = "deep-think-web-think-
17672
17690
  "calls_api": True,
17673
17691
  "secret_alias": "openai-primary",
17674
17692
  "env_var": "OPENAI_API_KEY",
17675
- "description": "Call GPT-5.4 with high reasoning to resolve disagreements before final research.",
17693
+ "description": f"Call {OPENAI_RESEARCH_MODEL} with high reasoning to resolve disagreements before final research.",
17676
17694
  },
17677
17695
  {
17678
17696
  "moment_id": "final_deep_research",
@@ -17691,7 +17709,7 @@ def _research_staged_deep_think_profile(profile_id: str = "deep-think-web-think-
17691
17709
  "call_moment": "opening_deep_research",
17692
17710
  "label": "Opening Deep Research",
17693
17711
  "provider": "openai",
17694
- "model": "o3-deep-research-2025-06-26",
17712
+ "model": OPENAI_DEEP_RESEARCH_MODEL,
17695
17713
  "adapter": "openai_responses",
17696
17714
  "role": (
17697
17715
  "Initial Deep Research scan. Map the landscape, source families, hard unknowns, "
@@ -17712,9 +17730,11 @@ def _research_staged_deep_think_profile(profile_id: str = "deep-think-web-think-
17712
17730
  ],
17713
17731
  "env_var": "OPENAI_API_KEY",
17714
17732
  "secret_alias": "openai-primary",
17733
+ "spend_policy_required": True,
17734
+ "reasoning_effort": "xhigh",
17715
17735
  "reasoning_summary": "auto",
17716
17736
  "web_search": True,
17717
- "web_search_tool": "web_search_preview",
17737
+ "web_search_tool": "web_search",
17718
17738
  "background": False,
17719
17739
  "spend_reserve_usd": 1.5,
17720
17740
  "max_tool_calls": 40,
@@ -17728,7 +17748,7 @@ def _research_staged_deep_think_profile(profile_id: str = "deep-think-web-think-
17728
17748
  "call_moment": "think_after_deep",
17729
17749
  "label": "Think after Deep Research",
17730
17750
  "provider": "openai",
17731
- "model": "gpt-5.4",
17751
+ "model": OPENAI_RESEARCH_MODEL,
17732
17752
  "adapter": "openai_responses",
17733
17753
  "role": (
17734
17754
  "High-reasoning critique of the opening Deep Research output. Compress it into a sharper "
@@ -17748,6 +17768,7 @@ def _research_staged_deep_think_profile(profile_id: str = "deep-think-web-think-
17748
17768
  ],
17749
17769
  "env_var": "OPENAI_API_KEY",
17750
17770
  "secret_alias": "openai-primary",
17771
+ "spend_policy_required": True,
17751
17772
  "reasoning_effort": "high",
17752
17773
  "text_verbosity": "medium",
17753
17774
  "spend_reserve_usd": 0.5,
@@ -17761,7 +17782,7 @@ def _research_staged_deep_think_profile(profile_id: str = "deep-think-web-think-
17761
17782
  "call_moment": "think_web_crosscheck",
17762
17783
  "label": "Think with web cross-check",
17763
17784
  "provider": "openai",
17764
- "model": "gpt-5.4",
17785
+ "model": OPENAI_RESEARCH_MODEL,
17765
17786
  "adapter": "openai_responses",
17766
17787
  "role": (
17767
17788
  "High-reasoning web-search pass. Verify current facts, citations, public claims, "
@@ -17781,6 +17802,7 @@ def _research_staged_deep_think_profile(profile_id: str = "deep-think-web-think-
17781
17802
  ],
17782
17803
  "env_var": "OPENAI_API_KEY",
17783
17804
  "secret_alias": "openai-primary",
17805
+ "spend_policy_required": True,
17784
17806
  "reasoning_effort": "high",
17785
17807
  "text_verbosity": "medium",
17786
17808
  "web_search": True,
@@ -17799,7 +17821,7 @@ def _research_staged_deep_think_profile(profile_id: str = "deep-think-web-think-
17799
17821
  "call_moment": "think_synthesis",
17800
17822
  "label": "Synthesis thinking pass",
17801
17823
  "provider": "openai",
17802
- "model": "gpt-5.4",
17824
+ "model": OPENAI_RESEARCH_MODEL,
17803
17825
  "adapter": "openai_responses",
17804
17826
  "role": (
17805
17827
  "High-reasoning synthesis pass. Reconcile the deep-research map, critique, and web cross-check "
@@ -17818,6 +17840,7 @@ def _research_staged_deep_think_profile(profile_id: str = "deep-think-web-think-
17818
17840
  ],
17819
17841
  "env_var": "OPENAI_API_KEY",
17820
17842
  "secret_alias": "openai-primary",
17843
+ "spend_policy_required": True,
17821
17844
  "reasoning_effort": "high",
17822
17845
  "text_verbosity": "medium",
17823
17846
  "spend_reserve_usd": 0.5,
@@ -17831,7 +17854,7 @@ def _research_staged_deep_think_profile(profile_id: str = "deep-think-web-think-
17831
17854
  "call_moment": "final_deep_research",
17832
17855
  "label": "Final Deep Research",
17833
17856
  "provider": "openai",
17834
- "model": "o3-deep-research-2025-06-26",
17857
+ "model": OPENAI_DEEP_RESEARCH_MODEL,
17835
17858
  "adapter": "openai_responses",
17836
17859
  "role": (
17837
17860
  "Final Deep Research pass. Use all prior lane outputs to produce the decisive, source-grounded "
@@ -17851,9 +17874,11 @@ def _research_staged_deep_think_profile(profile_id: str = "deep-think-web-think-
17851
17874
  ],
17852
17875
  "env_var": "OPENAI_API_KEY",
17853
17876
  "secret_alias": "openai-primary",
17877
+ "spend_policy_required": True,
17878
+ "reasoning_effort": "xhigh",
17854
17879
  "reasoning_summary": "auto",
17855
17880
  "web_search": True,
17856
- "web_search_tool": "web_search_preview",
17881
+ "web_search_tool": "web_search",
17857
17882
  "background": False,
17858
17883
  "spend_reserve_usd": 1.5,
17859
17884
  "max_tool_calls": 40,
@@ -17901,7 +17926,7 @@ def _research_default_profile(profile_id: str = "openai-council") -> dict[str, A
17901
17926
  "calls_api": True,
17902
17927
  "secret_alias": "openai-primary",
17903
17928
  "env_var": "OPENAI_API_KEY",
17904
- "description": "Call GPT-5.4 with high reasoning for the deliberate thinking pass.",
17929
+ "description": f"Call {OPENAI_RESEARCH_MODEL} with high reasoning for the deliberate thinking pass.",
17905
17930
  },
17906
17931
  {
17907
17932
  "moment_id": "web_synthesis",
@@ -17909,7 +17934,7 @@ def _research_default_profile(profile_id: str = "openai-council") -> dict[str, A
17909
17934
  "calls_api": True,
17910
17935
  "secret_alias": "openai-primary",
17911
17936
  "env_var": "OPENAI_API_KEY",
17912
- "description": "Call GPT-5.4 with web search for current public evidence and citations.",
17937
+ "description": f"Call {OPENAI_RESEARCH_MODEL} with web search for current public evidence and citations.",
17913
17938
  },
17914
17939
  {
17915
17940
  "moment_id": "pro_deep_research",
@@ -17926,11 +17951,12 @@ def _research_default_profile(profile_id: str = "openai-council") -> dict[str, A
17926
17951
  "call_moment": "thinking_reasoning_high",
17927
17952
  "label": "OpenAI reasoning high",
17928
17953
  "provider": "openai",
17929
- "model": "gpt-5.4",
17954
+ "model": OPENAI_RESEARCH_MODEL,
17930
17955
  "adapter": "openai_responses",
17931
17956
  "role": "Deliberate high-reasoning pass from the provided context. Think hard, critique assumptions, and produce a decision-oriented answer.",
17932
17957
  "env_var": "OPENAI_API_KEY",
17933
17958
  "secret_alias": "openai-primary",
17959
+ "spend_policy_required": True,
17934
17960
  "reasoning_effort": "high",
17935
17961
  "text_verbosity": "medium",
17936
17962
  "spend_reserve_usd": 0.5,
@@ -17941,11 +17967,12 @@ def _research_default_profile(profile_id: str = "openai-council") -> dict[str, A
17941
17967
  "call_moment": "web_synthesis",
17942
17968
  "label": "OpenAI web synthesis",
17943
17969
  "provider": "openai",
17944
- "model": "gpt-5.4",
17970
+ "model": OPENAI_RESEARCH_MODEL,
17945
17971
  "adapter": "openai_responses",
17946
17972
  "role": "Recency-aware synthesis using OpenAI Responses web search with citations.",
17947
17973
  "env_var": "OPENAI_API_KEY",
17948
17974
  "secret_alias": "openai-primary",
17975
+ "spend_policy_required": True,
17949
17976
  "reasoning_effort": "high",
17950
17977
  "text_verbosity": "medium",
17951
17978
  "web_search": True,
@@ -17961,14 +17988,16 @@ def _research_default_profile(profile_id: str = "openai-council") -> dict[str, A
17961
17988
  "call_moment": "pro_deep_research",
17962
17989
  "label": "OpenAI Pro / Deep Research",
17963
17990
  "provider": "openai",
17964
- "model": "o3-deep-research-2025-06-26",
17991
+ "model": OPENAI_DEEP_RESEARCH_MODEL,
17965
17992
  "adapter": "openai_responses",
17966
17993
  "role": "Pro Research style long-form investigation. Produce a structured, citation-rich report grounded in public sources.",
17967
17994
  "env_var": "OPENAI_API_KEY",
17968
17995
  "secret_alias": "openai-primary",
17996
+ "spend_policy_required": True,
17997
+ "reasoning_effort": "xhigh",
17969
17998
  "reasoning_summary": "auto",
17970
17999
  "web_search": True,
17971
- "web_search_tool": "web_search_preview",
18000
+ "web_search_tool": "web_search",
17972
18001
  "background": True,
17973
18002
  "spend_reserve_usd": 3.5,
17974
18003
  "max_tool_calls": 40,
@@ -18480,6 +18509,7 @@ def _research_openai_spend_preflight(
18480
18509
  provider = str(lane.get("provider", "") or "").strip()
18481
18510
  secret_alias = str(lane.get("secret_alias", "") or "").strip()
18482
18511
  reserve_usd = _research_lane_spend_reserve_usd(lane)
18512
+ spend_policy_required = bool(lane.get("spend_policy_required", False))
18483
18513
  entry, entry_issue = _research_spend_policy_entry_for_lane(lane)
18484
18514
  policy = _normalize_secret_spend_policy(entry.get("spend_policy", {}) if isinstance(entry, dict) else {})
18485
18515
  date_utc = dt.datetime.now(dt.timezone.utc).date().isoformat()
@@ -18493,11 +18523,15 @@ def _research_openai_spend_preflight(
18493
18523
  "ledger_path": str(_research_spend_ledger_path()),
18494
18524
  }
18495
18525
  if not policy:
18526
+ reason = entry_issue or "no spend policy configured for this local keychain entry"
18527
+ if spend_policy_required:
18528
+ reason = f"required spend policy missing: {reason}"
18496
18529
  return {
18497
18530
  **base,
18498
- "allowed": True,
18531
+ "allowed": not spend_policy_required,
18499
18532
  "policy_source": "",
18500
- "reason": entry_issue or "no spend policy configured for this local keychain entry",
18533
+ "spend_policy_required": spend_policy_required,
18534
+ "reason": reason,
18501
18535
  }
18502
18536
 
18503
18537
  reserved_today = _research_spend_ledger_today_total(
@@ -18514,6 +18548,7 @@ def _research_openai_spend_preflight(
18514
18548
  **base,
18515
18549
  "allowed": allowed,
18516
18550
  "policy_source": "keychain",
18551
+ "spend_policy_required": spend_policy_required,
18517
18552
  "daily_cap_usd": round(daily_cap_usd, 6),
18518
18553
  "currency": str(policy.get("currency", "USD")).strip() or "USD",
18519
18554
  "reserved_today_usd": reserved_today,
@@ -18914,7 +18949,7 @@ def _research_run_openai_lane(
18914
18949
  }
18915
18950
 
18916
18951
  body: dict[str, Any] = {
18917
- "model": str(lane.get("model", "gpt-5.4")).strip() or "gpt-5.4",
18952
+ "model": str(lane.get("model", OPENAI_RESEARCH_MODEL)).strip() or OPENAI_RESEARCH_MODEL,
18918
18953
  "input": prompt,
18919
18954
  "background": bool(lane.get("background", False)),
18920
18955
  }
@@ -13,10 +13,15 @@ read:
13
13
  - Read `llms.txt`.
14
14
  - Run `orp about --json`.
15
15
  - Run `orp hygiene --json` before long delegation, after material writeback,
16
- before API/remote/paid compute, and whenever dirty state grows unexpectedly.
16
+ before remote side effects or unbudgeted paid compute, and whenever dirty
17
+ state grows unexpectedly.
17
18
  If it reports `dirty_unclassified`, stop long-running expansion and classify
18
19
  the paths, refresh generated surfaces, canonicalize useful scratch, or write a
19
20
  blocker before continuing.
21
+ - Do not hard-stop solely because an OpenAI research lane is paid. Budgeted ORP
22
+ research may run when `orp research` spend preflight is within the configured
23
+ daily cap; stop for missing required spend policy, missing secret, cap
24
+ exhaustion, purchases, or non-ORP paid compute.
20
25
  - If the task benefits from fresh concepting, tasteful interface work, or
21
26
  exploratory reframing, run:
22
27
  - `orp mode nudge sleek-minimal-progressive --json`
@@ -91,7 +91,7 @@ A true gate is not "the agent feels uncertain."
91
91
 
92
92
  A true gate is a boundary like:
93
93
 
94
- - spend or purchase
94
+ - unbudgeted spend or purchase
95
95
  - outreach or counterparty contact
96
96
  - provider/vendor selection with real consequences
97
97
  - legal/oversight/compliance judgment
@@ -196,7 +196,7 @@ Bad candidates for automatic compilation:
196
196
 
197
197
  - vague strategic narratives with no runnable command
198
198
  - tasks that imply counterparty contact
199
- - tasks that imply money
199
+ - tasks that imply unbudgeted money
200
200
  - steps that promote support-only outputs into authority
201
201
 
202
202
  ## What ORP Should Emit
@@ -230,6 +230,7 @@ The controller benchmark experiment surfaced the exact shape:
230
230
  2. compile the remaining pre-outreach tasks
231
231
  3. keep drafts unsent
232
232
  4. stop only when the next step would actually contact a counterparty or spend
233
+ outside a configured budget/preflight policy
233
234
  5. emit a gate dossier
234
235
  6. resume only after the human opens that gate
235
236
 
@@ -22,11 +22,11 @@ orp research ask "Where should this system live?" --execute --json
22
22
 
23
23
  The built-in `openai-council` profile defines three OpenAI API lanes:
24
24
 
25
- - `openai_reasoning_high`: `gpt-5.4` with `reasoning.effort=high` for the deliberate thinking pass.
26
- - `openai_web_synthesis`: `gpt-5.4` with high reasoning plus Responses API web search for current public evidence and citations.
27
- - `openai_deep_research`: `o3-deep-research-2025-06-26` with background execution and web search preview for Pro/Deep Research style investigation.
25
+ - `openai_reasoning_high`: `gpt-5.5` with `reasoning.effort=high` for the deliberate thinking pass.
26
+ - `openai_web_synthesis`: `gpt-5.5` with high reasoning plus Responses API web search for current public evidence and citations.
27
+ - `openai_deep_research`: `gpt-5.5` with `reasoning.effort=xhigh`, background execution, and Responses API web search for Pro/Deep Research style investigation.
28
28
 
29
- This follows OpenAI's current model guidance: `gpt-5.4` is the default for general-purpose, coding, reasoning, and agentic workflows; web search is enabled through the Responses API `tools` array when current information is needed; and Deep Research is available through the Responses endpoint with `o3-deep-research-2025-06-26`.
29
+ This follows OpenAI's current model guidance: `gpt-5.5` works best through the Responses API for reasoning and tool workflows; web search is enabled through the Responses API `tools` array when current information is needed; and deeper research-style work should use higher reasoning effort plus background mode.
30
30
 
31
31
  ## Staged Deep Research Template
32
32
 
@@ -102,6 +102,26 @@ printf '%s' '<openai-key>' | orp secrets keychain-add \
102
102
  --json
103
103
  ```
104
104
 
105
+ ## Spend Policy
106
+
107
+ The OpenAI research lanes are paid, but paid does not automatically mean human
108
+ hard stop. ORP treats them as budgeted provider calls when `openai-primary` has
109
+ a local spend policy and the lane passes spend preflight.
110
+
111
+ Set or update the local daily cap metadata like this:
112
+
113
+ ```bash
114
+ orp secrets keychain-spend-policy openai-primary \
115
+ --daily-spend-cap-usd 5 \
116
+ --dashboard-spend-cap-status unconfirmed \
117
+ --dashboard-url https://platform.openai.com/settings/organization/limits \
118
+ --json
119
+ ```
120
+
121
+ Live research should stop when the required spend policy is missing, the secret
122
+ is missing, the daily cap would be exceeded, or the work is unbudgeted provider
123
+ spend outside ORP research lanes.
124
+
105
125
  ## Fixtures
106
126
 
107
127
  Provider outputs can be attached without spending live calls:
@@ -119,7 +139,7 @@ Fixtures are useful when an OpenAI run happened outside ORP, when you are compar
119
139
 
120
140
  ORP uses the Responses API for these lanes. Useful knobs in profile JSON:
121
141
 
122
- - `model`: for example `gpt-5.4` or `o3-deep-research-2025-06-26`.
142
+ - `model`: for example `gpt-5.5`.
123
143
  - `call_moment`: the named research-loop moment when this lane may resolve a key.
124
144
  - `reasoning_effort`: `none`, `low`, `medium`, `high`, or `xhigh` for supported models.
125
145
  - `reasoning_summary`: `auto` or `detailed` for Deep Research reasoning summaries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "open-research-protocol",
3
- "version": "0.4.30",
3
+ "version": "0.4.32",
4
4
  "description": "ORP CLI (Open Research Protocol): workspace ledgers, secrets, scheduling, governed execution, and agent-friendly research workflows.",
5
5
  "license": "MIT",
6
6
  "author": "Fractal Research Group <cody@frg.earth>",
@@ -122,9 +122,9 @@ function buildCodexActivityIndex(tabs = [], options = {}) {
122
122
  return activityBySessionId;
123
123
  }
124
124
 
125
- function orderTabsByRecentActivity(tabs = [], options = {}) {
125
+ function buildRankedTabs(tabs = [], options = {}) {
126
126
  const activityBySessionId = buildCodexActivityIndex(tabs, options);
127
- const rankedTabs = tabs.map((tab, originalIndex) => {
127
+ return tabs.map((tab, originalIndex) => {
128
128
  const sessionActivity =
129
129
  tab.resumeTool === "codex" && tab.sessionId ? activityBySessionId.get(String(tab.sessionId).toLowerCase()) : null;
130
130
  return {
@@ -133,24 +133,40 @@ function orderTabsByRecentActivity(tabs = [], options = {}) {
133
133
  activityMs: sessionActivity?.mtimeMs || 0,
134
134
  };
135
135
  });
136
+ }
137
+
138
+ function orderTabsByRecentActivity(tabs = [], options = {}) {
139
+ const rankedTabs = buildRankedTabs(tabs, options);
140
+ const projects = new Map();
136
141
 
137
- const projectActivity = new Map();
138
142
  for (const ranked of rankedTabs) {
139
- const current = projectActivity.get(ranked.tab.path) || 0;
140
- projectActivity.set(ranked.tab.path, Math.max(current, ranked.activityMs));
143
+ const projectPath = ranked.tab.path;
144
+ if (!projects.has(projectPath)) {
145
+ projects.set(projectPath, {
146
+ projectPath,
147
+ firstIndex: ranked.originalIndex,
148
+ activityMs: ranked.activityMs,
149
+ tabs: [],
150
+ });
151
+ }
152
+
153
+ const project = projects.get(projectPath);
154
+ project.firstIndex = Math.min(project.firstIndex, ranked.originalIndex);
155
+ project.activityMs = Math.max(project.activityMs, ranked.activityMs);
156
+ project.tabs.push(ranked);
141
157
  }
142
158
 
143
- return rankedTabs
144
- .sort((left, right) => {
145
- const leftProjectActivity = projectActivity.get(left.tab.path) || 0;
146
- const rightProjectActivity = projectActivity.get(right.tab.path) || 0;
147
- return (
148
- rightProjectActivity - leftProjectActivity ||
159
+ return [...projects.values()]
160
+ .sort(
161
+ (left, right) =>
149
162
  right.activityMs - left.activityMs ||
150
- left.originalIndex - right.originalIndex
151
- );
152
- })
153
- .map((ranked) => ranked.tab);
163
+ left.firstIndex - right.firstIndex,
164
+ )
165
+ .flatMap((project) =>
166
+ project.tabs
167
+ .sort((left, right) => right.activityMs - left.activityMs || left.originalIndex - right.originalIndex)
168
+ .map((ranked) => ranked.tab),
169
+ );
154
170
  }
155
171
 
156
172
  export function parseWorkspaceTabsArgs(argv = []) {
@@ -74,12 +74,12 @@ test("buildWorkspaceCommandsReport exposes direct restart commands and exact sav
74
74
  assert.equal(report.commandCount, 3);
75
75
  assert.equal(report.tabs[0]?.resumeCommand, "codex resume abc-123");
76
76
  assert.equal(report.tabs[0]?.restartCommand, "cd '/Volumes/Code_2TB/code/collaboration' && codex resume abc-123");
77
- assert.equal(report.tabs[1]?.resumeCommand, "claude resume claude-456");
77
+ assert.equal(report.tabs[1]?.restartCommand, "cd '/Volumes/Code_2TB/code/collaboration'");
78
+ assert.equal(report.tabs[2]?.resumeCommand, "claude resume claude-456");
78
79
  assert.equal(
79
- report.tabs[1]?.restartCommand,
80
+ report.tabs[2]?.restartCommand,
80
81
  "cd '/Volumes/Code_2TB/code/anthropic-lab' && claude resume claude-456",
81
82
  );
82
- assert.equal(report.tabs[2]?.restartCommand, "cd '/Volumes/Code_2TB/code/collaboration'");
83
83
  });
84
84
 
85
85
  test("runWorkspaceCommands prints JSON with copyable commands", async () => {
@@ -111,16 +111,16 @@ test("buildWorkspaceTabsReport keeps duplicate titles unique and exposes generic
111
111
  "cd '/Volumes/Code_2TB/code/collaboration' && codex resume abc-123",
112
112
  );
113
113
  assert.equal(report.tabs[0]?.codexSessionId, "abc-123");
114
- assert.equal(report.tabs[1]?.title, "anthropic-lab");
115
- assert.equal(report.tabs[1]?.resumeCommand, "claude resume claude-456");
116
- assert.equal(report.tabs[1]?.remoteBranch, "main");
114
+ assert.equal(report.tabs[1]?.title, "collaboration (2)");
115
+ assert.equal(report.tabs[1]?.codexSessionId, null);
116
+ assert.equal(report.tabs[2]?.title, "anthropic-lab");
117
+ assert.equal(report.tabs[2]?.resumeCommand, "claude resume claude-456");
118
+ assert.equal(report.tabs[2]?.remoteBranch, "main");
117
119
  assert.equal(
118
- report.tabs[1]?.restartCommand,
120
+ report.tabs[2]?.restartCommand,
119
121
  "cd '/Volumes/Code_2TB/code/anthropic-lab' && claude resume claude-456",
120
122
  );
121
- assert.equal(report.tabs[1]?.claudeSessionId, "claude-456");
122
- assert.equal(report.tabs[2]?.title, "collaboration (2)");
123
- assert.equal(report.tabs[2]?.codexSessionId, null);
123
+ assert.equal(report.tabs[2]?.claudeSessionId, "claude-456");
124
124
  });
125
125
 
126
126
  test("buildWorkspaceTabsReport ranks Codex tabs by recent local session activity", async () => {
@@ -183,6 +183,143 @@ test("buildWorkspaceTabsReport ranks Codex tabs by recent local session activity
183
183
  assert.equal(report.projects[2]?.path, "/Volumes/Code_2TB/code/no-session-project");
184
184
  });
185
185
 
186
+ test("buildWorkspaceTabsReport ranks tracked Codex sessions by update time, not rollout creation time", async () => {
187
+ const tempDir = await makeTempDir();
188
+ const codexHome = path.join(tempDir, "codex-home");
189
+ const sessionsDir = path.join(codexHome, "sessions", "2026", "04", "15");
190
+ await fs.mkdir(sessionsDir, { recursive: true });
191
+
192
+ const oldRolloutUpdatedSessionId = "019d0000-0000-7000-8000-000000000021";
193
+ const newRolloutStaleSessionId = "019d0000-0000-7000-8000-000000000022";
194
+ const untrackedFreshSessionId = "019d0000-0000-7000-8000-000000000023";
195
+ const oldRolloutUpdatedPath = path.join(
196
+ sessionsDir,
197
+ `rollout-2026-04-15T01-00-00-${oldRolloutUpdatedSessionId}.jsonl`,
198
+ );
199
+ const newRolloutStalePath = path.join(
200
+ sessionsDir,
201
+ `rollout-2026-04-15T09-00-00-${newRolloutStaleSessionId}.jsonl`,
202
+ );
203
+ const untrackedFreshPath = path.join(sessionsDir, `rollout-2026-04-15T10-00-00-${untrackedFreshSessionId}.jsonl`);
204
+
205
+ await fs.writeFile(oldRolloutUpdatedPath, "{}\n", "utf8");
206
+ await fs.writeFile(newRolloutStalePath, "{}\n", "utf8");
207
+ await fs.writeFile(untrackedFreshPath, "{}\n", "utf8");
208
+ await fs.utimes(oldRolloutUpdatedPath, new Date("2026-04-15T11:00:00Z"), new Date("2026-04-15T11:00:00Z"));
209
+ await fs.utimes(newRolloutStalePath, new Date("2026-04-15T09:00:00Z"), new Date("2026-04-15T09:00:00Z"));
210
+ await fs.utimes(untrackedFreshPath, new Date("2026-04-15T12:00:00Z"), new Date("2026-04-15T12:00:00Z"));
211
+
212
+ const parsed = parseWorkspaceSource({
213
+ sourceType: "workspace-file",
214
+ sourceLabel: "/tmp/workspace.json",
215
+ title: "workspace",
216
+ workspaceManifest: {
217
+ version: "1",
218
+ workspaceId: "orp-main",
219
+ tabs: [
220
+ {
221
+ title: "new-rollout-stale",
222
+ path: "/Volumes/Code_2TB/code/new-rollout-stale",
223
+ resumeCommand: `codex resume ${newRolloutStaleSessionId}`,
224
+ },
225
+ {
226
+ title: "old-rollout-updated",
227
+ path: "/Volumes/Code_2TB/code/old-rollout-updated",
228
+ resumeCommand: `codex resume ${oldRolloutUpdatedSessionId}`,
229
+ },
230
+ ],
231
+ },
232
+ notes: "",
233
+ });
234
+
235
+ const report = buildWorkspaceTabsReport(
236
+ {
237
+ sourceType: "workspace-file",
238
+ sourceLabel: "/tmp/workspace.json",
239
+ title: "workspace",
240
+ },
241
+ parsed,
242
+ { codexHome },
243
+ );
244
+
245
+ assert.deepEqual(
246
+ report.tabs.map((tab) => tab.title),
247
+ ["old-rollout-updated", "new-rollout-stale"],
248
+ );
249
+ });
250
+
251
+ test("buildWorkspaceTabsReport bubbles a project when one attached Codex session is freshest", async () => {
252
+ const tempDir = await makeTempDir();
253
+ const codexHome = path.join(tempDir, "codex-home");
254
+ const sessionsDir = path.join(codexHome, "sessions", "2026", "04", "16");
255
+ await fs.mkdir(sessionsDir, { recursive: true });
256
+
257
+ const projectAOlderSessionId = "019d0000-0000-7000-8000-000000000011";
258
+ const projectANewerSessionId = "019d0000-0000-7000-8000-000000000012";
259
+ const projectBSessionId = "019d0000-0000-7000-8000-000000000013";
260
+
261
+ const projectAOlderPath = path.join(sessionsDir, `rollout-2026-04-16T01-00-00-${projectAOlderSessionId}.jsonl`);
262
+ const projectANewerPath = path.join(sessionsDir, `rollout-2026-04-16T03-00-00-${projectANewerSessionId}.jsonl`);
263
+ const projectBPath = path.join(sessionsDir, `rollout-2026-04-16T02-00-00-${projectBSessionId}.jsonl`);
264
+
265
+ await fs.writeFile(projectAOlderPath, "{}\n", "utf8");
266
+ await fs.writeFile(projectANewerPath, "{}\n", "utf8");
267
+ await fs.writeFile(projectBPath, "{}\n", "utf8");
268
+ await fs.utimes(projectAOlderPath, new Date("2026-04-16T01:00:00Z"), new Date("2026-04-16T01:00:00Z"));
269
+ await fs.utimes(projectANewerPath, new Date("2026-04-16T03:00:00Z"), new Date("2026-04-16T03:00:00Z"));
270
+ await fs.utimes(projectBPath, new Date("2026-04-16T02:00:00Z"), new Date("2026-04-16T02:00:00Z"));
271
+
272
+ const parsed = parseWorkspaceSource({
273
+ sourceType: "workspace-file",
274
+ sourceLabel: "/tmp/workspace.json",
275
+ title: "workspace",
276
+ workspaceManifest: {
277
+ version: "1",
278
+ workspaceId: "orp-main",
279
+ tabs: [
280
+ {
281
+ title: "project-b",
282
+ path: "/Volumes/Code_2TB/code/project-b",
283
+ resumeCommand: `codex resume ${projectBSessionId}`,
284
+ },
285
+ {
286
+ title: "project-a-old",
287
+ path: "/Volumes/Code_2TB/code/project-a",
288
+ resumeCommand: `codex resume ${projectAOlderSessionId}`,
289
+ },
290
+ {
291
+ title: "project-a-new",
292
+ path: "/Volumes/Code_2TB/code/project-a",
293
+ resumeCommand: `codex resume ${projectANewerSessionId}`,
294
+ },
295
+ ],
296
+ },
297
+ notes: "",
298
+ });
299
+
300
+ const report = buildWorkspaceTabsReport(
301
+ {
302
+ sourceType: "workspace-file",
303
+ sourceLabel: "/tmp/workspace.json",
304
+ title: "workspace",
305
+ },
306
+ parsed,
307
+ { codexHome },
308
+ );
309
+
310
+ assert.deepEqual(
311
+ report.tabs.map((tab) => tab.title),
312
+ ["project-a-new", "project-a-old", "project-b"],
313
+ );
314
+ assert.equal(report.projects[0]?.path, "/Volumes/Code_2TB/code/project-a");
315
+ assert.equal(report.projects[0]?.sessionCount, 2);
316
+ assert.deepEqual(
317
+ report.projects[0]?.sessions.map((session) => session.title),
318
+ ["project-a-new", "project-a-old"],
319
+ );
320
+ assert.equal(report.projects[1]?.path, "/Volumes/Code_2TB/code/project-b");
321
+ });
322
+
186
323
  test("runWorkspaceTabs prints JSON without launch commands", async () => {
187
324
  const tempDir = await makeTempDir();
188
325
  const manifestPath = path.join(tempDir, "workspace.json");
Binary file