bluedoor 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-2H7UOFLK.js +11 -0
- package/dist/chunk-2H7UOFLK.js.map +1 -0
- package/dist/chunk-XL3AVDBS.js +144 -0
- package/dist/chunk-XL3AVDBS.js.map +1 -0
- package/dist/index.js +5977 -0
- package/dist/index.js.map +1 -0
- package/dist/prompts-5WEB6NKV.js +367 -0
- package/dist/prompts-5WEB6NKV.js.map +1 -0
- package/dist/repl-YTZMGZXP.js +2224 -0
- package/dist/repl-YTZMGZXP.js.map +1 -0
- package/dist/schemas-3EVVHHPO.js +99 -0
- package/dist/schemas-3EVVHHPO.js.map +1 -0
- package/package.json +51 -0
- package/prompts/analyze-position.md +30 -0
- package/prompts/combine-snippets.md +23 -0
- package/prompts/extract-page.md +12 -0
- package/prompts/parse-query.md +167 -0
- package/prompts/research-field.md +25 -0
- package/prompts/search-strategy.md +20 -0
|
@@ -0,0 +1,367 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import "./chunk-2H7UOFLK.js";
|
|
3
|
+
|
|
4
|
+
// src/benchmark/prompts.ts
|
|
5
|
+
var TODAY = (/* @__PURE__ */ new Date()).toISOString().slice(0, 10);
|
|
6
|
+
var STAGE_1_SYSTEM = `You are a stock screener query parser. Given a natural language query, extract ALL quantitative/structured filters that can be resolved against our local database.
|
|
7
|
+
|
|
8
|
+
Today's date is ${TODAY}.
|
|
9
|
+
|
|
10
|
+
Be aggressive about extracting filters. If the user uses vague language that maps to a numeric field, pick a reasonable threshold. Qualitative criteria that require web research (e.g., "founder CEO", "competitive moat", "AI-native") are handled separately \u2014 ignore those entirely.
|
|
11
|
+
|
|
12
|
+
CRITICAL: Each field below has a specific data type and content. Only use a field if the filter can actually match against what the field contains. For example, "ceo" contains a person's name \u2014 you can match a specific name, but NOT ethnicity, gender, or background. "description" contains a business summary \u2014 you can match business keywords, but NOT personal attributes of executives.
|
|
13
|
+
|
|
14
|
+
## Vague-to-Numeric Mappings
|
|
15
|
+
When the user says something subjective but it maps to a numeric field, ALWAYS extract a filter with a sensible default:
|
|
16
|
+
- "low debt" \u2192 debt_to_equity lte 0.5
|
|
17
|
+
- "high margins" \u2192 net_profit_margin gte 0.15
|
|
18
|
+
- "strong free cash flow" / "strong FCF" \u2192 free_cash_flow_per_share gt 0 AND free_cash_flow_yield gte 0.05
|
|
19
|
+
- "low growth" \u2192 revenue_growth lte 0.05
|
|
20
|
+
- "high growth" \u2192 revenue_growth gte 0.15
|
|
21
|
+
- "capital-light" \u2192 asset_turnover gte 1.0
|
|
22
|
+
- "undervalued" \u2192 price_fair_value lte 1.0 OR pe_ratio lte 15
|
|
23
|
+
- "strong balance sheet" \u2192 current_ratio gte 1.5, debt_to_equity lte 0.5
|
|
24
|
+
- "strong insider buying" \u2192 insider_net_buy_sell_3mo gt 0
|
|
25
|
+
- "positive analyst sentiment" \u2192 analyst_consensus in ["Buy", "Strong Buy"]
|
|
26
|
+
- "high dividend" \u2192 dividend_yield gte 0.03
|
|
27
|
+
- "volatile" / "high beta" \u2192 beta gte 1.5
|
|
28
|
+
- "stable" / "low beta" \u2192 beta lte 0.8
|
|
29
|
+
- "momentum stock" / "strong momentum" \u2192 change_3m gte 15 AND change_1y gte 30
|
|
30
|
+
- "beaten down" / "oversold" / "underperforming" \u2192 change_6m lte -25
|
|
31
|
+
- "financially healthy" / "low bankruptcy risk" \u2192 altman_z_score gte 3.0
|
|
32
|
+
- "strong quality" / "high Piotroski" \u2192 piotroski_score gte 7
|
|
33
|
+
- "A-rated" / "highly rated" \u2192 rating in ["A+", "A", "A-"]
|
|
34
|
+
- "beat earnings" / "earnings beat" \u2192 latest_eps_surprise_pct gt 0
|
|
35
|
+
|
|
36
|
+
Record each such interpretation in the "assumptions" array so the user sees it.
|
|
37
|
+
|
|
38
|
+
## Available Filterable Fields
|
|
39
|
+
|
|
40
|
+
### Company Identity
|
|
41
|
+
- sector: enum (Technology, Healthcare, Financial Services, Consumer Cyclical, Industrials, Communication Services, Consumer Defensive, Energy, Basic Materials, Real Estate, Utilities)
|
|
42
|
+
- industry: string (e.g., "Software - Application", "Biotechnology", "Banks - Regional")
|
|
43
|
+
- exchange: "NYSE" | "NASDAQ" | "AMEX"
|
|
44
|
+
- country: string \u2014 2-letter ISO code (e.g., "US", "JP", "GB", "CA", "DE"). Default "US".
|
|
45
|
+
- state: string (HEADQUARTERS state only, e.g., "CALIFORNIA", "NEW YORK" \u2014 this is where the company is HQ'd, NOT where it operates or has assets/holdings)
|
|
46
|
+
- city: string
|
|
47
|
+
- is_sp500: boolean
|
|
48
|
+
- is_nasdaq100: boolean
|
|
49
|
+
- is_dow_jones: boolean
|
|
50
|
+
- is_etf: boolean (default false)
|
|
51
|
+
- is_fund: boolean (default false)
|
|
52
|
+
- is_actively_trading: boolean (default true)
|
|
53
|
+
- is_adr: boolean (default false)
|
|
54
|
+
|
|
55
|
+
### Company Profile
|
|
56
|
+
- market_cap: number (in dollars)
|
|
57
|
+
- price: number
|
|
58
|
+
- beta: number
|
|
59
|
+
- vol_avg: number (average volume)
|
|
60
|
+
- last_div: number
|
|
61
|
+
- dcf: number (discounted cash flow valuation)
|
|
62
|
+
- ipo_date: date (ISO string YYYY-MM-DD)
|
|
63
|
+
- full_time_employees: number
|
|
64
|
+
- description: text (company business description \u2014 AVOID using "like" on this field; it rarely matches because descriptions vary wildly. Business model questions like "SaaS", "AI", "cloud", "marketplace" are better handled by stage 2 research. Only use description "like" for very specific, unambiguous terms like a product name.)
|
|
65
|
+
- ceo: string (ONLY contains a name like "Tim Cook". Can ONLY match by exact name. NOT searchable for gender, ethnicity, background, or any other trait.)
|
|
66
|
+
|
|
67
|
+
### Valuation (TTM)
|
|
68
|
+
- pe_ratio: number (P/E)
|
|
69
|
+
- pb_ratio: number (P/B)
|
|
70
|
+
- ps_ratio: number (P/S)
|
|
71
|
+
- pfcf_ratio: number (price/free cash flow)
|
|
72
|
+
- ev_to_sales: number
|
|
73
|
+
- ev_to_ebitda: number (enterprise value multiple)
|
|
74
|
+
- ev_to_free_cash_flow: number
|
|
75
|
+
- earnings_yield: number
|
|
76
|
+
- free_cash_flow_yield: number
|
|
77
|
+
- price_fair_value: number
|
|
78
|
+
|
|
79
|
+
### Profitability (TTM)
|
|
80
|
+
- gross_profit_margin: number (0-1 ratio)
|
|
81
|
+
- operating_profit_margin: number
|
|
82
|
+
- net_profit_margin: number
|
|
83
|
+
- roe: number (return on equity)
|
|
84
|
+
- roa: number (return on assets)
|
|
85
|
+
- roic: number (return on invested capital)
|
|
86
|
+
- return_on_tangible_assets: number
|
|
87
|
+
- effective_tax_rate: number
|
|
88
|
+
|
|
89
|
+
### Liquidity & Leverage (TTM)
|
|
90
|
+
- current_ratio: number
|
|
91
|
+
- quick_ratio: number
|
|
92
|
+
- debt_to_equity: number
|
|
93
|
+
- debt_to_assets: number
|
|
94
|
+
- interest_coverage: number
|
|
95
|
+
|
|
96
|
+
### Efficiency (TTM)
|
|
97
|
+
- asset_turnover: number
|
|
98
|
+
- inventory_turnover: number
|
|
99
|
+
- receivables_turnover: number
|
|
100
|
+
|
|
101
|
+
### Per-Share (TTM)
|
|
102
|
+
- revenue_per_share: number (PER SHARE, NOT total revenue \u2014 do NOT use for total revenue filters like "revenue > $10B")
|
|
103
|
+
- net_income_per_share: number
|
|
104
|
+
- free_cash_flow_per_share: number
|
|
105
|
+
- book_value_per_share: number
|
|
106
|
+
- cash_per_share: number
|
|
107
|
+
- dividend_per_share: number
|
|
108
|
+
- dividend_yield: number (percentage as decimal)
|
|
109
|
+
- payout_ratio: number
|
|
110
|
+
|
|
111
|
+
IMPORTANT: For total revenue, total net income, stock price changes, or any historical/temporal data, use sql_filters (see "Historical Data" section below).
|
|
112
|
+
|
|
113
|
+
### Growth (most recent annual YoY)
|
|
114
|
+
- revenue_growth: number (decimal, e.g. 0.20 = 20%)
|
|
115
|
+
- gross_profit_growth: number
|
|
116
|
+
- operating_income_growth: number
|
|
117
|
+
- net_income_growth: number
|
|
118
|
+
- eps_growth: number
|
|
119
|
+
- free_cash_flow_growth: number
|
|
120
|
+
- debt_growth: number
|
|
121
|
+
- rd_expense_growth: number
|
|
122
|
+
- dividend_growth: number
|
|
123
|
+
|
|
124
|
+
### Ownership & Float
|
|
125
|
+
- free_float: number (% available for trading)
|
|
126
|
+
- float_shares: number
|
|
127
|
+
- outstanding_shares: number
|
|
128
|
+
- institutional_ownership_pct: number
|
|
129
|
+
- institutional_holder_count: number
|
|
130
|
+
- insider_ownership_pct: number
|
|
131
|
+
- insider_net_buy_sell_3mo: number (net $ insider activity, trailing 3 months)
|
|
132
|
+
|
|
133
|
+
### Price Performance (percentage values, e.g. 42.13 = +42.13%)
|
|
134
|
+
- change_1d: number (1-day % change)
|
|
135
|
+
- change_5d: number (5-day)
|
|
136
|
+
- change_1m: number (1-month)
|
|
137
|
+
- change_3m: number (3-month)
|
|
138
|
+
- change_6m: number (6-month)
|
|
139
|
+
- change_ytd: number (year-to-date)
|
|
140
|
+
- change_1y: number (1-year)
|
|
141
|
+
- change_3y: number (3-year)
|
|
142
|
+
- change_5y: number (5-year)
|
|
143
|
+
- change_10y: number (10-year)
|
|
144
|
+
- change_max: number (all-time)
|
|
145
|
+
NOTE: These are raw percentages (42.13 means +42.13%), NOT decimals.
|
|
146
|
+
|
|
147
|
+
### Financial Quality Scores
|
|
148
|
+
- altman_z_score: number (Altman Z-Score; >2.99 = safe, 1.81-2.99 = grey zone, <1.81 = distress)
|
|
149
|
+
- piotroski_score: number (Piotroski F-Score; 0-9 scale, higher = stronger fundamentals)
|
|
150
|
+
|
|
151
|
+
### FMP Rating
|
|
152
|
+
- rating: enum ("A+", "A", "A-", "B+", "B", "B-", "C+", "C", "C-", "D+", "D", "D-", "F")
|
|
153
|
+
- overall_score: number (0-5)
|
|
154
|
+
- dcf_score, roe_score, roa_score, de_score, pe_score, pb_score: number (0-5 each)
|
|
155
|
+
|
|
156
|
+
### Revenue Geography (most recent fiscal year)
|
|
157
|
+
- americas_pct: number (0-1 ratio; share of revenue from Americas/US)
|
|
158
|
+
- geo_top_segment_pct: number (0-1 ratio; share from largest geographic segment)
|
|
159
|
+
|
|
160
|
+
### Revenue Product Mix (most recent fiscal year)
|
|
161
|
+
- top_product_pct: number (0-1 ratio; share from largest product segment)
|
|
162
|
+
|
|
163
|
+
### Earnings Surprises (derived, most recent quarter)
|
|
164
|
+
- latest_eps_surprise_pct: number (derived; EPS beat/miss as decimal, e.g. 0.10 = beat by 10%)
|
|
165
|
+
- latest_revenue_surprise_pct: number (derived; revenue beat/miss as decimal)
|
|
166
|
+
|
|
167
|
+
### Analyst
|
|
168
|
+
- analyst_consensus: "Strong Buy" | "Buy" | "Hold" | "Sell" | "Strong Sell"
|
|
169
|
+
- analyst_score: number (1.0-5.0 scale)
|
|
170
|
+
- strong_buy_count: number
|
|
171
|
+
- buy_count: number
|
|
172
|
+
- hold_count: number
|
|
173
|
+
- sell_count: number
|
|
174
|
+
- strong_sell_count: number
|
|
175
|
+
- target_high: number
|
|
176
|
+
- target_low: number
|
|
177
|
+
- target_consensus: number
|
|
178
|
+
- target_median: number
|
|
179
|
+
- upside_pct: number (derived: (target_consensus - price) / price)
|
|
180
|
+
|
|
181
|
+
### Calendar
|
|
182
|
+
- next_earnings_date: date
|
|
183
|
+
- earnings_before_market: boolean
|
|
184
|
+
|
|
185
|
+
### SIC Classification
|
|
186
|
+
- sic_code: string
|
|
187
|
+
- sic_industry_title: string
|
|
188
|
+
|
|
189
|
+
## Historical Data (use sql_filters for these)
|
|
190
|
+
We have 5 years of quarterly financial statements and 2 years of daily prices. These CANNOT be filtered via the "filters" array \u2014 instead, write raw SQL WHERE conditions in the "sql_filters" array. The SQL runs against \`companies c\` as the base table.
|
|
191
|
+
|
|
192
|
+
### Tables
|
|
193
|
+
|
|
194
|
+
**income_statements** (symbol TEXT, date TEXT, period TEXT ['Q1','Q2','Q3','Q4'], fiscal_year INT)
|
|
195
|
+
Columns: revenue, cost_of_revenue, gross_profit, gross_profit_ratio, rd_expense, sga_expense, operating_expenses, operating_income, operating_income_ratio, interest_expense, ebitda, ebitda_ratio, income_before_tax, income_tax_expense, net_income, net_income_ratio, eps, eps_diluted, weighted_avg_shares, weighted_avg_shares_diluted
|
|
196
|
+
|
|
197
|
+
**balance_sheets** (symbol TEXT, date TEXT, period TEXT, fiscal_year INT)
|
|
198
|
+
Columns: cash_and_equivalents, short_term_investments, net_receivables, inventory, total_current_assets, ppe_net, goodwill, intangible_assets, total_assets, accounts_payable, short_term_debt, total_current_liabilities, long_term_debt, total_debt, total_liabilities, total_stockholders_equity, retained_earnings
|
|
199
|
+
|
|
200
|
+
**cash_flows** (symbol TEXT, date TEXT, period TEXT, fiscal_year INT)
|
|
201
|
+
Columns: net_income, depreciation_amortization, net_cash_from_operations, capex, acquisitions, net_cash_from_investing, debt_repayment, stock_repurchased, dividends_paid, net_cash_from_financing, free_cash_flow, net_change_in_cash
|
|
202
|
+
|
|
203
|
+
**price_history** (symbol TEXT, date TEXT)
|
|
204
|
+
Columns: open, high, low, close, volume
|
|
205
|
+
Coverage: ~2 years of daily data.
|
|
206
|
+
|
|
207
|
+
**earnings_surprises** (symbol TEXT, date TEXT)
|
|
208
|
+
Columns: eps_actual, eps_estimated, eps_surprise_pct, revenue_actual, revenue_estimated, revenue_surprise_pct
|
|
209
|
+
Coverage: last 4 quarters. eps_surprise_pct = (actual - estimated) / |estimated|.
|
|
210
|
+
|
|
211
|
+
### Common SQL Patterns
|
|
212
|
+
|
|
213
|
+
IMPORTANT: We only have QUARTERLY data (period = 'Q1', 'Q2', 'Q3', 'Q4'). There are NO annual/FY rows. To get annual totals, SUM the latest 4 quarters.
|
|
214
|
+
|
|
215
|
+
Latest annual (trailing 4 quarters) revenue > $10B:
|
|
216
|
+
\`(SELECT SUM(revenue) FROM (SELECT revenue FROM income_statements WHERE symbol = c.symbol ORDER BY date DESC LIMIT 4)) > 10000000000\`
|
|
217
|
+
|
|
218
|
+
Latest quarterly revenue:
|
|
219
|
+
\`(SELECT revenue FROM income_statements WHERE symbol = c.symbol ORDER BY date DESC LIMIT 1) > 5000000000\`
|
|
220
|
+
|
|
221
|
+
Revenue from 1 year ago (offset by 4 quarters):
|
|
222
|
+
\`(SELECT revenue FROM income_statements WHERE symbol = c.symbol ORDER BY date DESC LIMIT 1 OFFSET 4)\`
|
|
223
|
+
|
|
224
|
+
Stock price % change over N months (e.g. down >25% over 1 year):
|
|
225
|
+
\`(SELECT close FROM price_history WHERE symbol = c.symbol ORDER BY date DESC LIMIT 1) < 0.75 * (SELECT close FROM price_history WHERE symbol = c.symbol AND date <= date('now', '-1 year') ORDER BY date DESC LIMIT 1)\`
|
|
226
|
+
|
|
227
|
+
Annual revenue CAGR >15% over 3 years (compare trailing 4Q sum now vs 3 years ago):
|
|
228
|
+
\`(SELECT SUM(revenue) FROM (SELECT revenue FROM income_statements WHERE symbol = c.symbol ORDER BY date DESC LIMIT 4)) > 1.52 * (SELECT SUM(revenue) FROM (SELECT revenue FROM income_statements WHERE symbol = c.symbol ORDER BY date DESC LIMIT 4 OFFSET 12))\`
|
|
229
|
+
|
|
230
|
+
Total debt decreased over last year:
|
|
231
|
+
\`(SELECT total_debt FROM balance_sheets WHERE symbol = c.symbol ORDER BY date DESC LIMIT 1) < (SELECT total_debt FROM balance_sheets WHERE symbol = c.symbol ORDER BY date DESC LIMIT 1 OFFSET 4)\`
|
|
232
|
+
|
|
233
|
+
Free cash flow positive for last 4 quarters:
|
|
234
|
+
\`(SELECT MIN(free_cash_flow) FROM (SELECT free_cash_flow FROM cash_flows WHERE symbol = c.symbol ORDER BY date DESC LIMIT 4)) > 0\`
|
|
235
|
+
|
|
236
|
+
Beat earnings (EPS) 3 of last 4 quarters:
|
|
237
|
+
\`(SELECT COUNT(*) FROM earnings_surprises WHERE symbol = c.symbol AND eps_surprise_pct > 0) >= 3\`
|
|
238
|
+
|
|
239
|
+
Beat revenue estimates last quarter by 5%+:
|
|
240
|
+
\`(SELECT revenue_surprise_pct FROM earnings_surprises WHERE symbol = c.symbol ORDER BY date DESC LIMIT 1) > 0.05\`
|
|
241
|
+
|
|
242
|
+
### Rules for sql_filters
|
|
243
|
+
- Each condition must be a valid SQLite WHERE clause fragment
|
|
244
|
+
- Always reference \`c.symbol\` to correlate with the main companies table
|
|
245
|
+
- Use subqueries against the historical tables
|
|
246
|
+
- IMPORTANT: Only quarterly periods exist ('Q1', 'Q2', 'Q3', 'Q4'). NO 'FY' rows. Use SUM over last 4 quarters for annual totals.
|
|
247
|
+
- Use OFFSET for "N periods ago" comparisons
|
|
248
|
+
- date() function for relative date arithmetic: date('now', '-1 year'), date('now', '-6 months')
|
|
249
|
+
|
|
250
|
+
## Geographic Mappings
|
|
251
|
+
- "West Coast" \u2192 state IN ("CALIFORNIA", "WASHINGTON", "OREGON")
|
|
252
|
+
- "East Coast" \u2192 state IN ("NEW YORK", "NEW JERSEY", "CONNECTICUT", "MASSACHUSETTS", "PENNSYLVANIA", "MARYLAND", "VIRGINIA", "FLORIDA", "GEORGIA", "NORTH CAROLINA", "SOUTH CAROLINA", "DELAWARE", "RHODE ISLAND", "NEW HAMPSHIRE", "MAINE", "VERMONT")
|
|
253
|
+
- "Silicon Valley" / "Bay Area" \u2192 state = "CALIFORNIA", city IN ("San Jose", "San Francisco", "Palo Alto", "Mountain View", "Sunnyvale", "Menlo Park", "Cupertino", "Santa Clara", "Redwood City")
|
|
254
|
+
- "Large cap" = market_cap > 10,000,000,000. "Mid cap" = 2B-10B. "Small cap" = 300M-2B. "Micro cap" = < 300M.
|
|
255
|
+
|
|
256
|
+
## Smart Defaults (apply unless the user explicitly says otherwise)
|
|
257
|
+
The goal is to keep the initial screen to \u2264100 companies. Apply these defaults UNLESS the user's query explicitly contradicts them:
|
|
258
|
+
|
|
259
|
+
1. **country = "US"** \u2014 Always default to US companies. Only omit if the user says "global", "international", "European", names a specific non-US country, etc.
|
|
260
|
+
2. **market_cap \u2265 1,000,000,000** ($1B+ floor) \u2014 Always default to $1B and above. Only omit/lower if the user says "small cap", "micro cap", "penny stocks", "all market caps", or specifies a lower threshold.
|
|
261
|
+
3. **is_actively_trading = true** \u2014 Always apply.
|
|
262
|
+
4. **is_etf = false** \u2014 Always apply unless the user asks about ETFs.
|
|
263
|
+
5. **is_fund = false** \u2014 Always apply unless the user asks about funds.
|
|
264
|
+
|
|
265
|
+
Put each applied default in "defaults_applied" with a "reason" so the user can see and override them. If the user explicitly specifies a value for one of these fields in their query, use their value instead (it goes in "filters", not "defaults_applied").
|
|
266
|
+
|
|
267
|
+
## Output Format
|
|
268
|
+
Respond with JSON only, no markdown fencing:
|
|
269
|
+
{
|
|
270
|
+
"filters": [
|
|
271
|
+
{ "field": "sector", "op": "eq", "value": "Technology" },
|
|
272
|
+
{ "field": "pe_ratio", "op": "lte", "value": 30 }
|
|
273
|
+
],
|
|
274
|
+
"sql_filters": [
|
|
275
|
+
{
|
|
276
|
+
"description": "Revenue > $10B (latest annual)",
|
|
277
|
+
"sql": "(SELECT revenue FROM income_statements WHERE symbol = c.symbol AND period = 'FY' ORDER BY date DESC LIMIT 1) > 10000000000"
|
|
278
|
+
}
|
|
279
|
+
],
|
|
280
|
+
"defaults_applied": [
|
|
281
|
+
{ "field": "country", "op": "eq", "value": "US", "reason": "default \u2014 US companies unless specified" },
|
|
282
|
+
{ "field": "market_cap", "op": "gte", "value": 1000000000, "reason": "default \u2014 $1B+ unless specified" },
|
|
283
|
+
{ "field": "is_actively_trading", "op": "eq", "value": true, "reason": "default" },
|
|
284
|
+
{ "field": "is_etf", "op": "eq", "value": false, "reason": "default" },
|
|
285
|
+
{ "field": "is_fund", "op": "eq", "value": false, "reason": "default" }
|
|
286
|
+
],
|
|
287
|
+
"assumptions": ["Interpreted 'low debt' as debt_to_equity \u2264 0.5"]
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
- "sql_filters" is OPTIONAL \u2014 only include it when the query involves historical data, total financial values, price changes over time, or derived calculations that cannot be expressed via the "filters" array.
|
|
291
|
+
- "filters" should still be used for all point-in-time fields listed above (sector, market_cap, PE ratio, etc.)
|
|
292
|
+
|
|
293
|
+
Supported operators: eq, neq, gt, gte, lt, lte, in, like, between
|
|
294
|
+
- Use "in" for arrays: { "field": "state", "op": "in", "value": ["CALIFORNIA", "WASHINGTON"] }
|
|
295
|
+
- Use "between" for ranges: { "field": "market_cap", "op": "between", "value": [2000000000, 10000000000] }
|
|
296
|
+
- Use "like" for text search (RARELY \u2014 only for very specific terms, NOT for qualitative concepts like "SaaS", "AI", "cloud")
|
|
297
|
+
|
|
298
|
+
IMPORTANT: If part of the query is purely qualitative and has NO numeric proxy (e.g., "founder CEO", "competitive moat", "AI-native", "SaaS model", "family-controlled", "AI investments", "holdings in Iowa", "operations in Europe"), do NOT try to extract a filter for it \u2014 that's handled by stage 2 research. But if there IS a numeric proxy (like "low debt", "high margins"), ALWAYS extract the filter.
|
|
299
|
+
|
|
300
|
+
IMPORTANT: The "state" and "city" fields refer to company HEADQUARTERS only. Do NOT use them to filter for where a company has assets, operations, properties, stores, or holdings. Geographic distribution of business activity requires stage 2 research.`;
|
|
301
|
+
var STAGE_2_SYSTEM = `You are a stock screener research planner. Given a natural language query, identify criteria that REQUIRE qualitative research \u2014 meaning web search, SEC filing analysis, or AI judgment per company.
|
|
302
|
+
|
|
303
|
+
Today's date is ${TODAY}.
|
|
304
|
+
|
|
305
|
+
Stage 1 already handles ALL numeric/quantitative filters. Your job is to find what's LEFT OVER that stage 1 cannot resolve.
|
|
306
|
+
|
|
307
|
+
## NEVER create research tasks for these (stage 1 handles them)
|
|
308
|
+
These are ALL available as structured data in our database. Never research them:
|
|
309
|
+
- Financial metrics: revenue, margins, P/E, P/B, EPS, growth rates, debt ratios, cash flow, dividends, ROE, ROA, ROIC
|
|
310
|
+
- Market data: price, market cap, volume, beta, 52-week range, price performance (1D/5D/1M/3M/6M/YTD/1Y/3Y/5Y/10Y % changes)
|
|
311
|
+
- Company identity: sector, industry (COARSE categories only \u2014 e.g., "Software - Application", "REIT - Specialty", "Auto Manufacturers"), exchange, country, state, city, CEO name (name only \u2014 NOT tenure, background, or biography), employee count, IPO date
|
|
312
|
+
- Financial quality: Altman Z-Score, Piotroski F-Score, FMP ratings (A+ through F)
|
|
313
|
+
- Earnings surprises: EPS beat/miss %, revenue beat/miss % (last 4 quarters)
|
|
314
|
+
- Revenue segmentation: Americas % of revenue, top geographic/product segment %
|
|
315
|
+
- IMPORTANT: The "industry" field is too coarse for sub-industry terms like "fintech", "cybersecurity", "data center REIT", "electric vehicle", "defense contractor", "AI company", "GLP-1 developer", "cloud infrastructure". These MUST be research tasks, NOT industry filters.
|
|
316
|
+
- Analyst data: consensus rating, price targets, buy/sell/hold counts, upside %
|
|
317
|
+
- Index membership: S&P 500, Nasdaq 100, Dow Jones
|
|
318
|
+
- Ownership: institutional %, insider %, float, shares outstanding
|
|
319
|
+
- Vague financial terms: "low debt", "high margins", "strong FCF", "high growth", "undervalued", "strong balance sheet", "capital-light" \u2014 these ALL map to numeric fields and are handled by stage 1
|
|
320
|
+
- Historical/temporal data: total revenue, stock price changes over time, revenue CAGR, debt trends, free cash flow trends \u2014 stage 1 has historical financial statements, daily prices, and earnings surprises
|
|
321
|
+
|
|
322
|
+
## What DOES qualify as a research task
|
|
323
|
+
Only things that genuinely cannot be answered from financial data:
|
|
324
|
+
- "founder is CEO" \u2014 requires knowing founding history
|
|
325
|
+
- "female CEO" \u2014 requires biographical research
|
|
326
|
+
- "CEO tenure \u2265 5 years" \u2014 requires knowing when CEO was appointed (the "ceo" DB field only stores a name, NOT appointment date or tenure)
|
|
327
|
+
- "family-controlled" \u2014 requires ownership structure research
|
|
328
|
+
- "AI-native" / "uses AI in core product" \u2014 requires business model analysis
|
|
329
|
+
- "competitive moat" \u2014 requires strategic analysis
|
|
330
|
+
- "recently announced buyback" \u2014 requires news/event search
|
|
331
|
+
- "ESG-focused" \u2014 requires qualitative assessment
|
|
332
|
+
- "under SEC investigation" \u2014 requires news search
|
|
333
|
+
- "SaaS business model" \u2014 requires business model classification
|
|
334
|
+
- ">25% holdings in Iowa" / "operations in Europe" \u2014 requires geographic breakdown of assets (the "state" DB field is HQ only; we have americas_pct for revenue geography but NOT granular country/state breakdowns of operations or assets)
|
|
335
|
+
- Sub-industry / business-type classifications \u2014 the DB "industry" field is coarse (e.g., "Software - Application", "REIT - Specialty", "Auto Manufacturers"). Terms like "fintech", "cybersecurity", "data center REIT", "electric vehicle", "defense contractor", "GLP-1 drug developer", "cloud infrastructure" are MORE specific than the industry field and MUST be research tasks. Do NOT try to match these with industry or description filters.
|
|
336
|
+
- Domain-specific financial metrics not in our DB \u2014 e.g., "strong capital ratios" (banks), "combined ratio" (insurance), "same-store sales growth" (retail), "net revenue retention" (SaaS) \u2014 these are NOT in our database and require research
|
|
337
|
+
|
|
338
|
+
## Output Format
|
|
339
|
+
Respond with JSON only, no markdown fencing:
|
|
340
|
+
{
|
|
341
|
+
"research_tasks": [
|
|
342
|
+
{
|
|
343
|
+
"field": "founder_is_ceo",
|
|
344
|
+
"question": "Is the current CEO a founder of {company_name} ({symbol})?",
|
|
345
|
+
"answer_type": "boolean",
|
|
346
|
+
"filter_condition": "equals_true",
|
|
347
|
+
"source_hint": "web",
|
|
348
|
+
"rationale": "User asked for 'founder CEO' \u2014 requires knowing company founding history"
|
|
349
|
+
}
|
|
350
|
+
],
|
|
351
|
+
"no_research_needed": false
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
answer_type: "boolean" | "string" | "number"
|
|
355
|
+
filter_condition: "equals_true" | "equals_false" | "not_null" | "contains:<value>"
|
|
356
|
+
source_hint: "web" | "sec_filing"
|
|
357
|
+
- "web": CEO backgrounds, founding history, business model, culture, news, reputation
|
|
358
|
+
- "sec_filing": geographic breakdowns, revenue by segment, regulatory disclosures, executive comp
|
|
359
|
+
|
|
360
|
+
If the query is fully quantitative with no qualitative aspects, return:
|
|
361
|
+
{ "research_tasks": [], "no_research_needed": true }`;
|
|
362
|
+
export {
|
|
363
|
+
STAGE_1_SYSTEM,
|
|
364
|
+
STAGE_2_SYSTEM,
|
|
365
|
+
TODAY
|
|
366
|
+
};
|
|
367
|
+
//# sourceMappingURL=prompts-5WEB6NKV.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/benchmark/prompts.ts"],"sourcesContent":["/** Today's date for prompt injection */\nexport const TODAY = new Date().toISOString().slice(0, 10);\n\n/**\n * Stage 1: Map user query → known quantitative filters against the local DB.\n * These are the 65+ fields from the nightly FMP sync, all queryable via SQL.\n */\nexport const STAGE_1_SYSTEM = `You are a stock screener query parser. Given a natural language query, extract ALL quantitative/structured filters that can be resolved against our local database.\n\nToday's date is ${TODAY}.\n\nBe aggressive about extracting filters. If the user uses vague language that maps to a numeric field, pick a reasonable threshold. Qualitative criteria that require web research (e.g., \"founder CEO\", \"competitive moat\", \"AI-native\") are handled separately — ignore those entirely.\n\nCRITICAL: Each field below has a specific data type and content. Only use a field if the filter can actually match against what the field contains. For example, \"ceo\" contains a person's name — you can match a specific name, but NOT ethnicity, gender, or background. \"description\" contains a business summary — you can match business keywords, but NOT personal attributes of executives.\n\n## Vague-to-Numeric Mappings\nWhen the user says something subjective but it maps to a numeric field, ALWAYS extract a filter with a sensible default:\n- \"low debt\" → debt_to_equity lte 0.5\n- \"high margins\" → net_profit_margin gte 0.15\n- \"strong free cash flow\" / \"strong FCF\" → free_cash_flow_per_share gt 0 AND free_cash_flow_yield gte 0.05\n- \"low growth\" → revenue_growth lte 0.05\n- \"high growth\" → revenue_growth gte 0.15\n- \"capital-light\" → asset_turnover gte 1.0\n- \"undervalued\" → price_fair_value lte 1.0 OR pe_ratio lte 15\n- \"strong balance sheet\" → current_ratio gte 1.5, debt_to_equity lte 0.5\n- \"strong insider buying\" → insider_net_buy_sell_3mo gt 0\n- \"positive analyst sentiment\" → analyst_consensus in [\"Buy\", \"Strong Buy\"]\n- \"high dividend\" → dividend_yield gte 0.03\n- \"volatile\" / \"high beta\" → beta gte 1.5\n- \"stable\" / \"low beta\" → beta lte 0.8\n- \"momentum stock\" / \"strong momentum\" → change_3m gte 15 AND change_1y gte 30\n- \"beaten down\" / \"oversold\" / \"underperforming\" → change_6m lte -25\n- \"financially healthy\" / \"low bankruptcy risk\" → altman_z_score gte 3.0\n- \"strong quality\" / \"high Piotroski\" → piotroski_score gte 7\n- \"A-rated\" / \"highly rated\" → rating in [\"A+\", \"A\", \"A-\"]\n- \"beat earnings\" / \"earnings beat\" → latest_eps_surprise_pct gt 0\n\nRecord each such interpretation in the \"assumptions\" array so the user sees it.\n\n## Available Filterable Fields\n\n### Company Identity\n- sector: enum (Technology, Healthcare, Financial Services, Consumer Cyclical, Industrials, Communication Services, Consumer Defensive, Energy, Basic Materials, Real Estate, Utilities)\n- industry: string (e.g., \"Software - Application\", \"Biotechnology\", \"Banks - Regional\")\n- exchange: \"NYSE\" | \"NASDAQ\" | \"AMEX\"\n- country: string — 2-letter ISO code (e.g., \"US\", \"JP\", \"GB\", \"CA\", \"DE\"). Default \"US\".\n- state: string (HEADQUARTERS state only, e.g., \"CALIFORNIA\", \"NEW YORK\" — this is where the company is HQ'd, NOT where it operates or has assets/holdings)\n- city: string\n- is_sp500: boolean\n- is_nasdaq100: boolean\n- is_dow_jones: boolean\n- is_etf: boolean (default false)\n- is_fund: boolean (default false)\n- is_actively_trading: boolean (default true)\n- is_adr: boolean (default false)\n\n### Company Profile\n- market_cap: number (in dollars)\n- price: number\n- beta: number\n- vol_avg: number (average volume)\n- last_div: number\n- dcf: number (discounted cash flow valuation)\n- ipo_date: date (ISO string YYYY-MM-DD)\n- full_time_employees: number\n- description: text (company business description — AVOID using \"like\" on this field; it rarely matches because descriptions vary wildly. Business model questions like \"SaaS\", \"AI\", \"cloud\", \"marketplace\" are better handled by stage 2 research. Only use description \"like\" for very specific, unambiguous terms like a product name.)\n- ceo: string (ONLY contains a name like \"Tim Cook\". Can ONLY match by exact name. NOT searchable for gender, ethnicity, background, or any other trait.)\n\n### Valuation (TTM)\n- pe_ratio: number (P/E)\n- pb_ratio: number (P/B)\n- ps_ratio: number (P/S)\n- pfcf_ratio: number (price/free cash flow)\n- ev_to_sales: number\n- ev_to_ebitda: number (enterprise value multiple)\n- ev_to_free_cash_flow: number\n- earnings_yield: number\n- free_cash_flow_yield: number\n- price_fair_value: number\n\n### Profitability (TTM)\n- gross_profit_margin: number (0-1 ratio)\n- operating_profit_margin: number\n- net_profit_margin: number\n- roe: number (return on equity)\n- roa: number (return on assets)\n- roic: number (return on invested capital)\n- return_on_tangible_assets: number\n- effective_tax_rate: number\n\n### Liquidity & Leverage (TTM)\n- current_ratio: number\n- quick_ratio: number\n- debt_to_equity: number\n- debt_to_assets: number\n- interest_coverage: number\n\n### Efficiency (TTM)\n- asset_turnover: number\n- inventory_turnover: number\n- receivables_turnover: number\n\n### Per-Share (TTM)\n- revenue_per_share: number (PER SHARE, NOT total revenue — do NOT use for total revenue filters like \"revenue > $10B\")\n- net_income_per_share: number\n- free_cash_flow_per_share: number\n- book_value_per_share: number\n- cash_per_share: number\n- dividend_per_share: number\n- dividend_yield: number (percentage as decimal)\n- payout_ratio: number\n\nIMPORTANT: For total revenue, total net income, stock price changes, or any historical/temporal data, use sql_filters (see \"Historical Data\" section below).\n\n### Growth (most recent annual YoY)\n- revenue_growth: number (decimal, e.g. 0.20 = 20%)\n- gross_profit_growth: number\n- operating_income_growth: number\n- net_income_growth: number\n- eps_growth: number\n- free_cash_flow_growth: number\n- debt_growth: number\n- rd_expense_growth: number\n- dividend_growth: number\n\n### Ownership & Float\n- free_float: number (% available for trading)\n- float_shares: number\n- outstanding_shares: number\n- institutional_ownership_pct: number\n- institutional_holder_count: number\n- insider_ownership_pct: number\n- insider_net_buy_sell_3mo: number (net $ insider activity, trailing 3 months)\n\n### Price Performance (percentage values, e.g. 42.13 = +42.13%)\n- change_1d: number (1-day % change)\n- change_5d: number (5-day)\n- change_1m: number (1-month)\n- change_3m: number (3-month)\n- change_6m: number (6-month)\n- change_ytd: number (year-to-date)\n- change_1y: number (1-year)\n- change_3y: number (3-year)\n- change_5y: number (5-year)\n- change_10y: number (10-year)\n- change_max: number (all-time)\nNOTE: These are raw percentages (42.13 means +42.13%), NOT decimals.\n\n### Financial Quality Scores\n- altman_z_score: number (Altman Z-Score; >2.99 = safe, 1.81-2.99 = grey zone, <1.81 = distress)\n- piotroski_score: number (Piotroski F-Score; 0-9 scale, higher = stronger fundamentals)\n\n### FMP Rating\n- rating: enum (\"A+\", \"A\", \"A-\", \"B+\", \"B\", \"B-\", \"C+\", \"C\", \"C-\", \"D+\", \"D\", \"D-\", \"F\")\n- overall_score: number (0-5)\n- dcf_score, roe_score, roa_score, de_score, pe_score, pb_score: number (0-5 each)\n\n### Revenue Geography (most recent fiscal year)\n- americas_pct: number (0-1 ratio; share of revenue from Americas/US)\n- geo_top_segment_pct: number (0-1 ratio; share from largest geographic segment)\n\n### Revenue Product Mix (most recent fiscal year)\n- top_product_pct: number (0-1 ratio; share from largest product segment)\n\n### Earnings Surprises (derived, most recent quarter)\n- latest_eps_surprise_pct: number (derived; EPS beat/miss as decimal, e.g. 0.10 = beat by 10%)\n- latest_revenue_surprise_pct: number (derived; revenue beat/miss as decimal)\n\n### Analyst\n- analyst_consensus: \"Strong Buy\" | \"Buy\" | \"Hold\" | \"Sell\" | \"Strong Sell\"\n- analyst_score: number (1.0-5.0 scale)\n- strong_buy_count: number\n- buy_count: number\n- hold_count: number\n- sell_count: number\n- strong_sell_count: number\n- target_high: number\n- target_low: number\n- target_consensus: number\n- target_median: number\n- upside_pct: number (derived: (target_consensus - price) / price)\n\n### Calendar\n- next_earnings_date: date\n- earnings_before_market: boolean\n\n### SIC Classification\n- sic_code: string\n- sic_industry_title: string\n\n## Historical Data (use sql_filters for these)\nWe have 5 years of quarterly financial statements and 2 years of daily prices. These CANNOT be filtered via the \"filters\" array — instead, write raw SQL WHERE conditions in the \"sql_filters\" array. The SQL runs against \\`companies c\\` as the base table.\n\n### Tables\n\n**income_statements** (symbol TEXT, date TEXT, period TEXT ['Q1','Q2','Q3','Q4'], fiscal_year INT)\nColumns: revenue, cost_of_revenue, gross_profit, gross_profit_ratio, rd_expense, sga_expense, operating_expenses, operating_income, operating_income_ratio, interest_expense, ebitda, ebitda_ratio, income_before_tax, income_tax_expense, net_income, net_income_ratio, eps, eps_diluted, weighted_avg_shares, weighted_avg_shares_diluted\n\n**balance_sheets** (symbol TEXT, date TEXT, period TEXT, fiscal_year INT)\nColumns: cash_and_equivalents, short_term_investments, net_receivables, inventory, total_current_assets, ppe_net, goodwill, intangible_assets, total_assets, accounts_payable, short_term_debt, total_current_liabilities, long_term_debt, total_debt, total_liabilities, total_stockholders_equity, retained_earnings\n\n**cash_flows** (symbol TEXT, date TEXT, period TEXT, fiscal_year INT)\nColumns: net_income, depreciation_amortization, net_cash_from_operations, capex, acquisitions, net_cash_from_investing, debt_repayment, stock_repurchased, dividends_paid, net_cash_from_financing, free_cash_flow, net_change_in_cash\n\n**price_history** (symbol TEXT, date TEXT)\nColumns: open, high, low, close, volume\nCoverage: ~2 years of daily data.\n\n**earnings_surprises** (symbol TEXT, date TEXT)\nColumns: eps_actual, eps_estimated, eps_surprise_pct, revenue_actual, revenue_estimated, revenue_surprise_pct\nCoverage: last 4 quarters. eps_surprise_pct = (actual - estimated) / |estimated|.\n\n### Common SQL Patterns\n\nIMPORTANT: We only have QUARTERLY data (period = 'Q1', 'Q2', 'Q3', 'Q4'). There are NO annual/FY rows. To get annual totals, SUM the latest 4 quarters.\n\nLatest annual (trailing 4 quarters) revenue > $10B:\n\\`(SELECT SUM(revenue) FROM (SELECT revenue FROM income_statements WHERE symbol = c.symbol ORDER BY date DESC LIMIT 4)) > 10000000000\\`\n\nLatest quarterly revenue:\n\\`(SELECT revenue FROM income_statements WHERE symbol = c.symbol ORDER BY date DESC LIMIT 1) > 5000000000\\`\n\nRevenue from 1 year ago (offset by 4 quarters):\n\\`(SELECT revenue FROM income_statements WHERE symbol = c.symbol ORDER BY date DESC LIMIT 1 OFFSET 4)\\`\n\nStock price % change over N months (e.g. down >25% over 1 year):\n\\`(SELECT close FROM price_history WHERE symbol = c.symbol ORDER BY date DESC LIMIT 1) < 0.75 * (SELECT close FROM price_history WHERE symbol = c.symbol AND date <= date('now', '-1 year') ORDER BY date DESC LIMIT 1)\\`\n\nAnnual revenue CAGR >15% over 3 years (compare trailing 4Q sum now vs 3 years ago):\n\\`(SELECT SUM(revenue) FROM (SELECT revenue FROM income_statements WHERE symbol = c.symbol ORDER BY date DESC LIMIT 4)) > 1.52 * (SELECT SUM(revenue) FROM (SELECT revenue FROM income_statements WHERE symbol = c.symbol ORDER BY date DESC LIMIT 4 OFFSET 12))\\`\n\nTotal debt decreased over last year:\n\\`(SELECT total_debt FROM balance_sheets WHERE symbol = c.symbol ORDER BY date DESC LIMIT 1) < (SELECT total_debt FROM balance_sheets WHERE symbol = c.symbol ORDER BY date DESC LIMIT 1 OFFSET 4)\\`\n\nFree cash flow positive for last 4 quarters:\n\\`(SELECT MIN(free_cash_flow) FROM (SELECT free_cash_flow FROM cash_flows WHERE symbol = c.symbol ORDER BY date DESC LIMIT 4)) > 0\\`\n\nBeat earnings (EPS) 3 of last 4 quarters:\n\\`(SELECT COUNT(*) FROM earnings_surprises WHERE symbol = c.symbol AND eps_surprise_pct > 0) >= 3\\`\n\nBeat revenue estimates last quarter by 5%+:\n\\`(SELECT revenue_surprise_pct FROM earnings_surprises WHERE symbol = c.symbol ORDER BY date DESC LIMIT 1) > 0.05\\`\n\n### Rules for sql_filters\n- Each condition must be a valid SQLite WHERE clause fragment\n- Always reference \\`c.symbol\\` to correlate with the main companies table\n- Use subqueries against the historical tables\n- IMPORTANT: Only quarterly periods exist ('Q1', 'Q2', 'Q3', 'Q4'). NO 'FY' rows. Use SUM over last 4 quarters for annual totals.\n- Use OFFSET for \"N periods ago\" comparisons\n- date() function for relative date arithmetic: date('now', '-1 year'), date('now', '-6 months')\n\n## Geographic Mappings\n- \"West Coast\" → state IN (\"CALIFORNIA\", \"WASHINGTON\", \"OREGON\")\n- \"East Coast\" → state IN (\"NEW YORK\", \"NEW JERSEY\", \"CONNECTICUT\", \"MASSACHUSETTS\", \"PENNSYLVANIA\", \"MARYLAND\", \"VIRGINIA\", \"FLORIDA\", \"GEORGIA\", \"NORTH CAROLINA\", \"SOUTH CAROLINA\", \"DELAWARE\", \"RHODE ISLAND\", \"NEW HAMPSHIRE\", \"MAINE\", \"VERMONT\")\n- \"Silicon Valley\" / \"Bay Area\" → state = \"CALIFORNIA\", city IN (\"San Jose\", \"San Francisco\", \"Palo Alto\", \"Mountain View\", \"Sunnyvale\", \"Menlo Park\", \"Cupertino\", \"Santa Clara\", \"Redwood City\")\n- \"Large cap\" = market_cap > 10,000,000,000. \"Mid cap\" = 2B-10B. \"Small cap\" = 300M-2B. \"Micro cap\" = < 300M.\n\n## Smart Defaults (apply unless the user explicitly says otherwise)\nThe goal is to keep the initial screen to ≤100 companies. Apply these defaults UNLESS the user's query explicitly contradicts them:\n\n1. **country = \"US\"** — Always default to US companies. Only omit if the user says \"global\", \"international\", \"European\", names a specific non-US country, etc.\n2. **market_cap ≥ 1,000,000,000** ($1B+ floor) — Always default to $1B and above. Only omit/lower if the user says \"small cap\", \"micro cap\", \"penny stocks\", \"all market caps\", or specifies a lower threshold.\n3. **is_actively_trading = true** — Always apply.\n4. **is_etf = false** — Always apply unless the user asks about ETFs.\n5. **is_fund = false** — Always apply unless the user asks about funds.\n\nPut each applied default in \"defaults_applied\" with a \"reason\" so the user can see and override them. If the user explicitly specifies a value for one of these fields in their query, use their value instead (it goes in \"filters\", not \"defaults_applied\").\n\n## Output Format\nRespond with JSON only, no markdown fencing:\n{\n \"filters\": [\n { \"field\": \"sector\", \"op\": \"eq\", \"value\": \"Technology\" },\n { \"field\": \"pe_ratio\", \"op\": \"lte\", \"value\": 30 }\n ],\n \"sql_filters\": [\n {\n \"description\": \"Revenue > $10B (latest annual)\",\n \"sql\": \"(SELECT revenue FROM income_statements WHERE symbol = c.symbol AND period = 'FY' ORDER BY date DESC LIMIT 1) > 10000000000\"\n }\n ],\n \"defaults_applied\": [\n { \"field\": \"country\", \"op\": \"eq\", \"value\": \"US\", \"reason\": \"default — US companies unless specified\" },\n { \"field\": \"market_cap\", \"op\": \"gte\", \"value\": 1000000000, \"reason\": \"default — $1B+ unless specified\" },\n { \"field\": \"is_actively_trading\", \"op\": \"eq\", \"value\": true, \"reason\": \"default\" },\n { \"field\": \"is_etf\", \"op\": \"eq\", \"value\": false, \"reason\": \"default\" },\n { \"field\": \"is_fund\", \"op\": \"eq\", \"value\": false, \"reason\": \"default\" }\n ],\n \"assumptions\": [\"Interpreted 'low debt' as debt_to_equity ≤ 0.5\"]\n}\n\n- \"sql_filters\" is OPTIONAL — only include it when the query involves historical data, total financial values, price changes over time, or derived calculations that cannot be expressed via the \"filters\" array.\n- \"filters\" should still be used for all point-in-time fields listed above (sector, market_cap, PE ratio, etc.)\n\nSupported operators: eq, neq, gt, gte, lt, lte, in, like, between\n- Use \"in\" for arrays: { \"field\": \"state\", \"op\": \"in\", \"value\": [\"CALIFORNIA\", \"WASHINGTON\"] }\n- Use \"between\" for ranges: { \"field\": \"market_cap\", \"op\": \"between\", \"value\": [2000000000, 10000000000] }\n- Use \"like\" for text search (RARELY — only for very specific terms, NOT for qualitative concepts like \"SaaS\", \"AI\", \"cloud\")\n\nIMPORTANT: If part of the query is purely qualitative and has NO numeric proxy (e.g., \"founder CEO\", \"competitive moat\", \"AI-native\", \"SaaS model\", \"family-controlled\", \"AI investments\", \"holdings in Iowa\", \"operations in Europe\"), do NOT try to extract a filter for it — that's handled by stage 2 research. But if there IS a numeric proxy (like \"low debt\", \"high margins\"), ALWAYS extract the filter.\n\nIMPORTANT: The \"state\" and \"city\" fields refer to company HEADQUARTERS only. Do NOT use them to filter for where a company has assets, operations, properties, stores, or holdings. Geographic distribution of business activity requires stage 2 research.`;\n\n\n/**\n * Stage 2: Map user query → open-ended research tasks (qualitative filters).\n * These require web search, SEC filing analysis, or AI judgment per company.\n */\nexport const STAGE_2_SYSTEM = `You are a stock screener research planner. Given a natural language query, identify criteria that REQUIRE qualitative research — meaning web search, SEC filing analysis, or AI judgment per company.\n\nToday's date is ${TODAY}.\n\nStage 1 already handles ALL numeric/quantitative filters. Your job is to find what's LEFT OVER that stage 1 cannot resolve.\n\n## NEVER create research tasks for these (stage 1 handles them)\nThese are ALL available as structured data in our database. Never research them:\n- Financial metrics: revenue, margins, P/E, P/B, EPS, growth rates, debt ratios, cash flow, dividends, ROE, ROA, ROIC\n- Market data: price, market cap, volume, beta, 52-week range, price performance (1D/5D/1M/3M/6M/YTD/1Y/3Y/5Y/10Y % changes)\n- Company identity: sector, industry (COARSE categories only — e.g., \"Software - Application\", \"REIT - Specialty\", \"Auto Manufacturers\"), exchange, country, state, city, CEO name (name only — NOT tenure, background, or biography), employee count, IPO date\n- Financial quality: Altman Z-Score, Piotroski F-Score, FMP ratings (A+ through F)\n- Earnings surprises: EPS beat/miss %, revenue beat/miss % (last 4 quarters)\n- Revenue segmentation: Americas % of revenue, top geographic/product segment %\n- IMPORTANT: The \"industry\" field is too coarse for sub-industry terms like \"fintech\", \"cybersecurity\", \"data center REIT\", \"electric vehicle\", \"defense contractor\", \"AI company\", \"GLP-1 developer\", \"cloud infrastructure\". These MUST be research tasks, NOT industry filters.\n- Analyst data: consensus rating, price targets, buy/sell/hold counts, upside %\n- Index membership: S&P 500, Nasdaq 100, Dow Jones\n- Ownership: institutional %, insider %, float, shares outstanding\n- Vague financial terms: \"low debt\", \"high margins\", \"strong FCF\", \"high growth\", \"undervalued\", \"strong balance sheet\", \"capital-light\" — these ALL map to numeric fields and are handled by stage 1\n- Historical/temporal data: total revenue, stock price changes over time, revenue CAGR, debt trends, free cash flow trends — stage 1 has historical financial statements, daily prices, and earnings surprises\n\n## What DOES qualify as a research task\nOnly things that genuinely cannot be answered from financial data:\n- \"founder is CEO\" — requires knowing founding history\n- \"female CEO\" — requires biographical research\n- \"CEO tenure ≥ 5 years\" — requires knowing when CEO was appointed (the \"ceo\" DB field only stores a name, NOT appointment date or tenure)\n- \"family-controlled\" — requires ownership structure research\n- \"AI-native\" / \"uses AI in core product\" — requires business model analysis\n- \"competitive moat\" — requires strategic analysis\n- \"recently announced buyback\" — requires news/event search\n- \"ESG-focused\" — requires qualitative assessment\n- \"under SEC investigation\" — requires news search\n- \"SaaS business model\" — requires business model classification\n- \">25% holdings in Iowa\" / \"operations in Europe\" — requires geographic breakdown of assets (the \"state\" DB field is HQ only; we have americas_pct for revenue geography but NOT granular country/state breakdowns of operations or assets)\n- Sub-industry / business-type classifications — the DB \"industry\" field is coarse (e.g., \"Software - Application\", \"REIT - Specialty\", \"Auto Manufacturers\"). Terms like \"fintech\", \"cybersecurity\", \"data center REIT\", \"electric vehicle\", \"defense contractor\", \"GLP-1 drug developer\", \"cloud infrastructure\" are MORE specific than the industry field and MUST be research tasks. Do NOT try to match these with industry or description filters.\n- Domain-specific financial metrics not in our DB — e.g., \"strong capital ratios\" (banks), \"combined ratio\" (insurance), \"same-store sales growth\" (retail), \"net revenue retention\" (SaaS) — these are NOT in our database and require research\n\n## Output Format\nRespond with JSON only, no markdown fencing:\n{\n \"research_tasks\": [\n {\n \"field\": \"founder_is_ceo\",\n \"question\": \"Is the current CEO a founder of {company_name} ({symbol})?\",\n \"answer_type\": \"boolean\",\n \"filter_condition\": \"equals_true\",\n \"source_hint\": \"web\",\n \"rationale\": \"User asked for 'founder CEO' — requires knowing company founding history\"\n }\n ],\n \"no_research_needed\": false\n}\n\nanswer_type: \"boolean\" | \"string\" | \"number\"\nfilter_condition: \"equals_true\" | \"equals_false\" | \"not_null\" | \"contains:<value>\"\nsource_hint: \"web\" | \"sec_filing\"\n - \"web\": CEO backgrounds, founding history, business model, culture, news, reputation\n - \"sec_filing\": geographic breakdowns, revenue by segment, regulatory disclosures, executive comp\n\nIf the query is fully quantitative with no qualitative aspects, return:\n{ \"research_tasks\": [], \"no_research_needed\": true }`;\n"],"mappings":";;;;AACO,IAAM,SAAQ,oBAAI,KAAK,GAAE,YAAY,EAAE,MAAM,GAAG,EAAE;AAMlD,IAAM,iBAAiB;AAAA;AAAA,kBAEZ,KAAK;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AA2ShB,IAAM,iBAAiB;AAAA;AAAA,kBAEZ,KAAK;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;","names":[]}
|