@robotostudio/senku 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -0
- package/dist/digest/github-fetch.d.ts +101 -0
- package/dist/digest/github-fetch.js +305 -0
- package/dist/digest/index.d.ts +3 -0
- package/dist/digest/index.js +199 -0
- package/dist/digest/transform.d.ts +89 -0
- package/dist/digest/transform.js +108 -0
- package/dist/digest/types.d.ts +135 -0
- package/dist/digest/types.js +5 -0
- package/dist/lib.d.ts +2 -0
- package/dist/lib.js +1 -0
- package/dist/output/summary.d.ts +5 -0
- package/dist/output/summary.js +26 -0
- package/dist/pipeline.js +1 -28
- package/dist/utils.d.ts +5 -0
- package/dist/utils.js +16 -0
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -104,6 +104,58 @@ interface RunOutput {
|
|
|
104
104
|
|
|
105
105
|
`analyze()` is the one-shot path. To compose the pipeline yourself, the pieces are exported too: `cloneRepo`, `scrapeRepo`, `getRepoName`, `getAuthors`, `checkRepoHealth`, `assertGitAvailable`, `discoverRepos`, `runPipeline`, `aggregateByTicket`, `estimateTicketHours`, `createLinearClient`, `fetchTickets`, `extractFromPR` — plus all the types.
|
|
106
106
|
|
|
107
|
+
## dailyDigest() — daily per-user GitHub API ingest
|
|
108
|
+
|
|
109
|
+
For automated, cron-scheduled work attribution (Trigger.dev, Vercel Cron, scheduled lambdas), use `dailyDigest()` instead of `analyze()`. It reads the GitHub API directly — no local clone required — and is designed for serverless environments.
|
|
110
|
+
|
|
111
|
+
```ts
|
|
112
|
+
import { dailyDigest } from "@robotostudio/senku";
|
|
113
|
+
|
|
114
|
+
const result = await dailyDigest({
|
|
115
|
+
users: [
|
|
116
|
+
{
|
|
117
|
+
login: "alice",
|
|
118
|
+
githubToken: process.env.GITHUB_TOKEN_ALICE,
|
|
119
|
+
linearApiKey: process.env.LINEAR_API_KEY_WORKSPACE_A,
|
|
120
|
+
repos: [{ slug: "org-a/repo-1" }, { slug: "org-a/repo-2" }],
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
login: "bob",
|
|
124
|
+
githubToken: process.env.GITHUB_TOKEN_BOB,
|
|
125
|
+
linearApiKey: process.env.LINEAR_API_KEY_WORKSPACE_B,
|
|
126
|
+
repos: [{ slug: "org-b/repo-1" }],
|
|
127
|
+
},
|
|
128
|
+
],
|
|
129
|
+
openaiApiKey: process.env.OPENAI_API_KEY,
|
|
130
|
+
since: "2026-05-21T00:00:00Z",
|
|
131
|
+
until: "2026-05-28T23:59:59Z",
|
|
132
|
+
org: "daily-digest-example",
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
// result.users[0].output has the same shape as analyze() RunOutput
|
|
136
|
+
for (const user of result.users) {
|
|
137
|
+
console.log(`${user.login}: ${user.output.totalHours}h`);
|
|
138
|
+
}
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
**Key differences from `analyze()`:**
|
|
142
|
+
|
|
143
|
+
| Aspect | `analyze()` | `dailyDigest()` |
|
|
144
|
+
|--------|-----------|-----------------|
|
|
145
|
+
| Data source | Local git clone (`git log --all`) | GitHub API (REST + GraphQL) |
|
|
146
|
+
| Token model | One `githubToken` → all repos | Per-user `githubToken` (org-scoped) |
|
|
147
|
+
| Attribution window | Commit window (from/to dates on commits) | Activity window — when work landed on GitHub (PR updated / commit pushed), not originally written |
|
|
148
|
+
| Use case | Historical analysis, deep dives | Daily crons, per-user aggregation, serverless tasks |
|
|
149
|
+
| Rate limit | None (local) | 5,000 API calls/hour per GitHub token |
|
|
150
|
+
|
|
151
|
+
**When to use which:**
|
|
152
|
+
|
|
153
|
+
- **Use `analyze()`** — One-off historical windows, all repos in one org, you control the GitHub token, want exact commit history.
|
|
154
|
+
- **Use `dailyDigest()`** — Daily crons, different GitHub orgs need different tokens, running in serverless (Trigger.dev), working with open PRs that haven't been merged yet.
|
|
155
|
+
- **Use both** — Combine in a morning digest: yesterday's `dailyDigest()` for quick per-user rollup, weekly `analyze()` for ticket-by-ticket detail.
|
|
156
|
+
|
|
157
|
+
See [`docs/digest-context.md`](./docs/digest-context.md) for domain terminology (attribution, work unit, user definitions).
|
|
158
|
+
|
|
107
159
|
## CLI
|
|
108
160
|
|
|
109
161
|
From a checkout:
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fail-safe GitHub API fetch layer for the digest pipeline.
|
|
3
|
+
*
|
|
4
|
+
* Properties:
|
|
5
|
+
* - **Preflight rate-limit check** refuses to start a user's run if the budget
|
|
6
|
+
* is exhausted (default threshold: 100 calls remaining).
|
|
7
|
+
* - **FetchResult<T> discriminated union**: every response maps to a typed
|
|
8
|
+
* failure reason so callers can branch without null-checking.
|
|
9
|
+
* - **5xx + network errors**: up to 3 retries with exponential backoff
|
|
10
|
+
* (500ms / 1s / 2s).
|
|
11
|
+
* - **Timeout**: 15s per request via `AbortSignal.timeout`.
|
|
12
|
+
* - **GraphQL error detection**: 200 responses with a non-empty `errors[]`
|
|
13
|
+
* array return `{ ok: false, reason: "graphql-error" }`.
|
|
14
|
+
* - **Telemetry**: every HTTP request increments `ctx.httpRequests`; each
|
|
15
|
+
* logical API call increments `ctx.apiCalls`. Retries appear in
|
|
16
|
+
* `httpRequests` but not in `apiCalls`. Errors push to `ctx.errors`.
|
|
17
|
+
*/
|
|
18
|
+
import type pino from "pino";
|
|
19
|
+
import { type GraphQLPullRequest, type RestCommit } from "./transform.js";
|
|
20
|
+
import type { DigestPRData } from "./types.js";
|
|
21
|
+
export type FetchFailReason = "unauthorized" | "forbidden" | "not-found" | "rate-limited" | "timeout" | "graphql-error" | "network" | "unknown";
|
|
22
|
+
export type FetchResult<T> = {
|
|
23
|
+
ok: true;
|
|
24
|
+
data: T;
|
|
25
|
+
} | {
|
|
26
|
+
ok: false;
|
|
27
|
+
reason: FetchFailReason;
|
|
28
|
+
status?: number;
|
|
29
|
+
message: string;
|
|
30
|
+
};
|
|
31
|
+
export interface CallContext {
|
|
32
|
+
/**
|
|
33
|
+
* Logical API calls — each distinct endpoint operation counts as one, even
|
|
34
|
+
* if it required multiple retries. Mirrors the pre-refactor `apiCalls`.
|
|
35
|
+
*/
|
|
36
|
+
apiCalls: number;
|
|
37
|
+
/**
|
|
38
|
+
* Raw HTTP requests sent, including retry attempts. Always >= `apiCalls`.
|
|
39
|
+
* Useful for surfacing retry overhead in telemetry.
|
|
40
|
+
*/
|
|
41
|
+
httpRequests: number;
|
|
42
|
+
errors: string[];
|
|
43
|
+
}
|
|
44
|
+
export declare const makeCallContext: () => CallContext;
|
|
45
|
+
/**
|
|
46
|
+
* Single typed fetch with retry + timeout. Returns a {@link FetchResult} —
|
|
47
|
+
* never throws. Maps status codes to {@link FetchFailReason}.
|
|
48
|
+
*
|
|
49
|
+
* `ctx.apiCalls` is incremented once by the **caller** (logical level).
|
|
50
|
+
* `ctx.httpRequests` is incremented here on every actual HTTP attempt,
|
|
51
|
+
* including retries.
|
|
52
|
+
*/
|
|
53
|
+
export declare function safeFetch<T>(url: string, token: string, ctx: CallContext, init?: RequestInit, attempt?: number): Promise<FetchResult<T>>;
|
|
54
|
+
/** Confirm token is valid and has enough rate-limit budget to run. */
|
|
55
|
+
export declare function preflight(token: string, ctx: CallContext): Promise<number | null>;
|
|
56
|
+
/**
|
|
57
|
+
* GraphQL search for PRs that were touched in the window by `login`. Returns
|
|
58
|
+
* full PR nodes including inner commits + stats. Paginates until
|
|
59
|
+
* `hasNextPage = false` or the {@link MAX_SEARCH_PAGES} hard cap is hit.
|
|
60
|
+
*
|
|
61
|
+
* The search query string is built outside the GraphQL document and injected
|
|
62
|
+
* as a `$searchQuery: String!` variable — eliminating the GraphQL injection
|
|
63
|
+
* vector while keeping the search term interpolation to the search API only.
|
|
64
|
+
*/
|
|
65
|
+
export declare function fetchPRsInWindow(token: string, login: string, repo: string, since: string, until: string, ctx: CallContext): Promise<GraphQLPullRequest[]>;
|
|
66
|
+
/**
|
|
67
|
+
* REST list of commits authored by `login` on the default branch in window.
|
|
68
|
+
* Paginates via `page=N` until a partial page is returned or the
|
|
69
|
+
* {@link MAX_DIRECT_COMMIT_PAGES} cap is hit (1000-commit ceiling).
|
|
70
|
+
* Used for direct-to-main commits with no PR — caller filters out merge
|
|
71
|
+
* commits + squash-merge commits using PR mergeCommit SHAs.
|
|
72
|
+
*/
|
|
73
|
+
export declare function fetchDirectCommits(token: string, login: string, repo: string, since: string, until: string, ctx: CallContext): Promise<RestCommit[]>;
|
|
74
|
+
/**
|
|
75
|
+
* Fetch per-commit addition/deletion/file-count from the REST single-commit
|
|
76
|
+
* endpoint. Returns `null` on any failure — callers treat null as "no stats".
|
|
77
|
+
*
|
|
78
|
+
* Counted as a logical `apiCalls` unit (each call is an independent cost unit).
|
|
79
|
+
*/
|
|
80
|
+
export declare function fetchCommitStats(token: string, repo: string, sha: string, ctx: CallContext): Promise<{
|
|
81
|
+
additions: number;
|
|
82
|
+
deletions: number;
|
|
83
|
+
filesChanged: number;
|
|
84
|
+
} | null>;
|
|
85
|
+
export interface FetchUserRepoOptions {
|
|
86
|
+
/**
|
|
87
|
+
* When true, issues one REST `GET /repos/:repo/commits/:sha` per direct
|
|
88
|
+
* commit to populate `additions`, `deletions`, and `filesChanged`.
|
|
89
|
+
* Default: false (cost reason — each commit is an extra API call).
|
|
90
|
+
*/
|
|
91
|
+
populateDirectCommitStats?: boolean;
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* High-level fetch for one (user, repo). Returns digest-shaped
|
|
95
|
+
* {@link DigestPRData[]} after merging both API arms and applying the
|
|
96
|
+
* closed-unmerged filter + direct-commit dedup vs PR merge commits.
|
|
97
|
+
*/
|
|
98
|
+
export declare function fetchUserRepo(user: {
|
|
99
|
+
login: string;
|
|
100
|
+
githubToken: string;
|
|
101
|
+
}, repo: string, since: string, until: string, ctx: CallContext, logger: pino.Logger, options?: FetchUserRepoOptions): Promise<DigestPRData[]>;
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
import { batchConcurrent } from "../utils.js";
|
|
2
|
+
import { collectMergeShas, directCommitToDigestPRData, isDirectCommit, prNodeToDigestPRData, } from "./transform.js";
|
|
3
|
+
const GH_REST = "https://api.github.com";
|
|
4
|
+
const GH_GRAPHQL = "https://api.github.com/graphql";
|
|
5
|
+
const PREFLIGHT_MIN_BUDGET = 100;
|
|
6
|
+
const MAX_RETRIES = 3;
|
|
7
|
+
const TIMEOUT_MS = 15_000;
|
|
8
|
+
/** GitHub search hard cap — prevents runaway pagination loops. */
|
|
9
|
+
const MAX_SEARCH_PAGES = 20;
|
|
10
|
+
const PAGE_SIZE = 50;
|
|
11
|
+
export const makeCallContext = () => ({
|
|
12
|
+
apiCalls: 0,
|
|
13
|
+
httpRequests: 0,
|
|
14
|
+
errors: [],
|
|
15
|
+
});
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
// Internal helpers
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
|
|
20
|
+
const buildHeaders = (token, extra = {}) => ({
|
|
21
|
+
Authorization: `Bearer ${token}`,
|
|
22
|
+
Accept: "application/vnd.github+json",
|
|
23
|
+
"X-GitHub-Api-Version": "2022-11-28",
|
|
24
|
+
"User-Agent": "senku-daily-digest",
|
|
25
|
+
...extra,
|
|
26
|
+
});
|
|
27
|
+
const STATUS_REASON = {
|
|
28
|
+
401: "unauthorized",
|
|
29
|
+
403: "forbidden",
|
|
30
|
+
404: "not-found",
|
|
31
|
+
429: "rate-limited",
|
|
32
|
+
};
|
|
33
|
+
function statusToReason(status) {
|
|
34
|
+
return STATUS_REASON[status] ?? "unknown";
|
|
35
|
+
}
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
// Core fetch primitive
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
/** Extract a readable message from a GraphQL error array, if present. */
|
|
40
|
+
function graphqlErrorMessage(body) {
|
|
41
|
+
if (!(body.errors && Array.isArray(body.errors)) || body.errors.length === 0) {
|
|
42
|
+
return null;
|
|
43
|
+
}
|
|
44
|
+
const first = body.errors[0];
|
|
45
|
+
return typeof first.message === "string" ? first.message : "GraphQL error";
|
|
46
|
+
}
|
|
47
|
+
/** Attempt one HTTP request. Returns a FetchResult or null when a retry is warranted. */
|
|
48
|
+
async function attemptFetch(url, token, ctx, init, attempt) {
|
|
49
|
+
ctx.httpRequests++;
|
|
50
|
+
try {
|
|
51
|
+
const res = await fetch(url, {
|
|
52
|
+
...init,
|
|
53
|
+
headers: buildHeaders(token, init.headers),
|
|
54
|
+
signal: AbortSignal.timeout(TIMEOUT_MS),
|
|
55
|
+
});
|
|
56
|
+
if (res.ok) {
|
|
57
|
+
const body = (await res.json());
|
|
58
|
+
const gqlErr = graphqlErrorMessage(body);
|
|
59
|
+
if (gqlErr) {
|
|
60
|
+
return { ok: false, reason: "graphql-error", status: 200, message: gqlErr };
|
|
61
|
+
}
|
|
62
|
+
return { ok: true, data: body };
|
|
63
|
+
}
|
|
64
|
+
if (res.status < 500) {
|
|
65
|
+
return {
|
|
66
|
+
ok: false,
|
|
67
|
+
reason: statusToReason(res.status),
|
|
68
|
+
status: res.status,
|
|
69
|
+
message: `${res.status} ${res.statusText} — ${url}`,
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
// 5xx — signal retry
|
|
73
|
+
return null;
|
|
74
|
+
}
|
|
75
|
+
catch (err) {
|
|
76
|
+
if (err instanceof DOMException && err.name === "TimeoutError") {
|
|
77
|
+
return { ok: false, reason: "timeout", message: `Timeout after ${TIMEOUT_MS}ms: ${url}` };
|
|
78
|
+
}
|
|
79
|
+
if (attempt + 1 >= MAX_RETRIES) {
|
|
80
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
81
|
+
return { ok: false, reason: "network", message: `Network error: ${msg} — ${url}` };
|
|
82
|
+
}
|
|
83
|
+
// Network error — signal retry
|
|
84
|
+
return null;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Single typed fetch with retry + timeout. Returns a {@link FetchResult} —
|
|
89
|
+
* never throws. Maps status codes to {@link FetchFailReason}.
|
|
90
|
+
*
|
|
91
|
+
* `ctx.apiCalls` is incremented once by the **caller** (logical level).
|
|
92
|
+
* `ctx.httpRequests` is incremented here on every actual HTTP attempt,
|
|
93
|
+
* including retries.
|
|
94
|
+
*/
|
|
95
|
+
export async function safeFetch(url, token, ctx, init = {}, attempt = 0) {
|
|
96
|
+
if (attempt >= MAX_RETRIES) {
|
|
97
|
+
return {
|
|
98
|
+
ok: false,
|
|
99
|
+
reason: "unknown",
|
|
100
|
+
message: `Max retries (${MAX_RETRIES}) exceeded: ${url}`,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
const result = await attemptFetch(url, token, ctx, init, attempt);
|
|
104
|
+
if (result !== null) {
|
|
105
|
+
return result;
|
|
106
|
+
}
|
|
107
|
+
await sleep(2 ** attempt * 500);
|
|
108
|
+
return safeFetch(url, token, ctx, init, attempt + 1);
|
|
109
|
+
}
|
|
110
|
+
// ---------------------------------------------------------------------------
|
|
111
|
+
// Preflight
|
|
112
|
+
// ---------------------------------------------------------------------------
|
|
113
|
+
/** Confirm token is valid and has enough rate-limit budget to run. */
|
|
114
|
+
export async function preflight(token, ctx) {
|
|
115
|
+
ctx.apiCalls++;
|
|
116
|
+
const result = await safeFetch(`${GH_REST}/rate_limit`, token, ctx);
|
|
117
|
+
if (!result.ok) {
|
|
118
|
+
ctx.errors.push(`preflight: ${result.message}`);
|
|
119
|
+
return null;
|
|
120
|
+
}
|
|
121
|
+
const remaining = result.data.resources.core.remaining;
|
|
122
|
+
return remaining >= PREFLIGHT_MIN_BUDGET ? remaining : null;
|
|
123
|
+
}
|
|
124
|
+
// ---------------------------------------------------------------------------
|
|
125
|
+
// Input validation
|
|
126
|
+
// ---------------------------------------------------------------------------
|
|
127
|
+
const LOGIN_RE = /^[A-Za-z0-9_-]+$/;
|
|
128
|
+
const REPO_RE = /^[\w.-]+\/[\w.-]+$/;
|
|
129
|
+
function validateFetchInputs(login, repo) {
|
|
130
|
+
if (!LOGIN_RE.test(login)) {
|
|
131
|
+
throw new Error(`Invalid GitHub login: "${login}" — only [A-Za-z0-9_-] allowed`);
|
|
132
|
+
}
|
|
133
|
+
if (!REPO_RE.test(repo)) {
|
|
134
|
+
throw new Error(`Invalid repo slug: "${repo}" — expected owner/repo format`);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
const GQL_PR_FIELDS = `
|
|
138
|
+
number title state createdAt mergedAt updatedAt headRefName url
|
|
139
|
+
author { login }
|
|
140
|
+
mergeCommit { oid }
|
|
141
|
+
commits(first: 100) {
|
|
142
|
+
nodes {
|
|
143
|
+
commit {
|
|
144
|
+
oid authoredDate committedDate message additions deletions changedFilesIfAvailable
|
|
145
|
+
author { user { login } email }
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
`;
|
|
150
|
+
const GQL_SEARCH_QUERY = `
|
|
151
|
+
query SearchPRs($searchQuery: String!, $after: String) {
|
|
152
|
+
search(query: $searchQuery, type: ISSUE, first: ${PAGE_SIZE}, after: $after) {
|
|
153
|
+
nodes {
|
|
154
|
+
... on PullRequest {
|
|
155
|
+
${GQL_PR_FIELDS}
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
pageInfo { endCursor hasNextPage }
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
`;
|
|
162
|
+
/**
|
|
163
|
+
* GraphQL search for PRs that were touched in the window by `login`. Returns
|
|
164
|
+
* full PR nodes including inner commits + stats. Paginates until
|
|
165
|
+
* `hasNextPage = false` or the {@link MAX_SEARCH_PAGES} hard cap is hit.
|
|
166
|
+
*
|
|
167
|
+
* The search query string is built outside the GraphQL document and injected
|
|
168
|
+
* as a `$searchQuery: String!` variable — eliminating the GraphQL injection
|
|
169
|
+
* vector while keeping the search term interpolation to the search API only.
|
|
170
|
+
*/
|
|
171
|
+
export function fetchPRsInWindow(token, login, repo, since, until, ctx) {
|
|
172
|
+
validateFetchInputs(login, repo);
|
|
173
|
+
// Build the GitHub search query string (NOT GraphQL syntax — interpolation here
|
|
174
|
+
// is safe because it targets the search API, not the GraphQL document itself).
|
|
175
|
+
const searchQuery = `repo:${repo} author:${login} updated:${since}..${until} type:pr`;
|
|
176
|
+
return fetchPRsPage(token, searchQuery, ctx, null, 0, []);
|
|
177
|
+
}
|
|
178
|
+
async function fetchPRsPage(token, searchQuery, ctx, cursor, page, accumulated) {
|
|
179
|
+
ctx.apiCalls++;
|
|
180
|
+
const variables = { searchQuery, after: cursor };
|
|
181
|
+
const result = await safeFetch(GH_GRAPHQL, token, ctx, {
|
|
182
|
+
method: "POST",
|
|
183
|
+
body: JSON.stringify({ query: GQL_SEARCH_QUERY, variables }),
|
|
184
|
+
});
|
|
185
|
+
if (!result.ok) {
|
|
186
|
+
ctx.errors.push(`fetchPRsInWindow page ${page + 1}: ${result.message}`);
|
|
187
|
+
return accumulated;
|
|
188
|
+
}
|
|
189
|
+
const { nodes, pageInfo } = result.data.data.search;
|
|
190
|
+
const prs = nodes.filter((n) => !!n?.number);
|
|
191
|
+
const next = [...accumulated, ...prs];
|
|
192
|
+
const nextPage = page + 1;
|
|
193
|
+
if (!pageInfo.hasNextPage) {
|
|
194
|
+
return next;
|
|
195
|
+
}
|
|
196
|
+
if (nextPage >= MAX_SEARCH_PAGES) {
|
|
197
|
+
ctx.errors.push(`search-truncated: ${next.length} PRs`);
|
|
198
|
+
return next;
|
|
199
|
+
}
|
|
200
|
+
return fetchPRsPage(token, searchQuery, ctx, pageInfo.endCursor, nextPage, next);
|
|
201
|
+
}
|
|
202
|
+
// ---------------------------------------------------------------------------
|
|
203
|
+
// REST direct-commit fetch (#5)
|
|
204
|
+
// ---------------------------------------------------------------------------
|
|
205
|
+
/** Hard cap for REST commit pagination — 10 × 100 = 1000 commits per (user, repo). */
|
|
206
|
+
const MAX_DIRECT_COMMIT_PAGES = 10;
|
|
207
|
+
async function fetchDirectCommitsPage(token, baseUrl, ctx, page, accumulated) {
|
|
208
|
+
if (page > MAX_DIRECT_COMMIT_PAGES) {
|
|
209
|
+
ctx.errors.push(`fetchDirectCommits: truncated at ${MAX_DIRECT_COMMIT_PAGES * 100} commits`);
|
|
210
|
+
return accumulated;
|
|
211
|
+
}
|
|
212
|
+
const url = new URL(baseUrl);
|
|
213
|
+
url.searchParams.set("page", `${page}`);
|
|
214
|
+
const result = await safeFetch(url.toString(), token, ctx);
|
|
215
|
+
if (!result.ok) {
|
|
216
|
+
ctx.errors.push(`fetchDirectCommits: ${result.message}`);
|
|
217
|
+
return accumulated;
|
|
218
|
+
}
|
|
219
|
+
const next = [...accumulated, ...result.data];
|
|
220
|
+
if (result.data.length < 100) {
|
|
221
|
+
return next;
|
|
222
|
+
}
|
|
223
|
+
return fetchDirectCommitsPage(token, baseUrl, ctx, page + 1, next);
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* REST list of commits authored by `login` on the default branch in window.
|
|
227
|
+
* Paginates via `page=N` until a partial page is returned or the
|
|
228
|
+
* {@link MAX_DIRECT_COMMIT_PAGES} cap is hit (1000-commit ceiling).
|
|
229
|
+
* Used for direct-to-main commits with no PR — caller filters out merge
|
|
230
|
+
* commits + squash-merge commits using PR mergeCommit SHAs.
|
|
231
|
+
*/
|
|
232
|
+
export function fetchDirectCommits(token, login, repo, since, until, ctx) {
|
|
233
|
+
validateFetchInputs(login, repo);
|
|
234
|
+
const baseUrl = new URL(`${GH_REST}/repos/${repo}/commits`);
|
|
235
|
+
baseUrl.searchParams.set("author", login);
|
|
236
|
+
baseUrl.searchParams.set("since", since);
|
|
237
|
+
baseUrl.searchParams.set("until", until);
|
|
238
|
+
baseUrl.searchParams.set("per_page", "100");
|
|
239
|
+
ctx.apiCalls++;
|
|
240
|
+
return fetchDirectCommitsPage(token, baseUrl, ctx, 1, []);
|
|
241
|
+
}
|
|
242
|
+
/**
|
|
243
|
+
* Fetch per-commit addition/deletion/file-count from the REST single-commit
|
|
244
|
+
* endpoint. Returns `null` on any failure — callers treat null as "no stats".
|
|
245
|
+
*
|
|
246
|
+
* Counted as a logical `apiCalls` unit (each call is an independent cost unit).
|
|
247
|
+
*/
|
|
248
|
+
export async function fetchCommitStats(token, repo, sha, ctx) {
|
|
249
|
+
const url = new URL(`${GH_REST}/repos/${repo}/commits/${sha}`);
|
|
250
|
+
ctx.apiCalls++;
|
|
251
|
+
const result = await safeFetch(url.toString(), token, ctx);
|
|
252
|
+
if (!result.ok) {
|
|
253
|
+
ctx.errors.push(`fetchCommitStats ${sha}: ${result.message}`);
|
|
254
|
+
return null;
|
|
255
|
+
}
|
|
256
|
+
return {
|
|
257
|
+
additions: result.data.stats?.additions ?? 0,
|
|
258
|
+
deletions: result.data.stats?.deletions ?? 0,
|
|
259
|
+
filesChanged: result.data.files?.length ?? 0,
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
/**
|
|
263
|
+
* High-level fetch for one (user, repo). Returns digest-shaped
|
|
264
|
+
* {@link DigestPRData[]} after merging both API arms and applying the
|
|
265
|
+
* closed-unmerged filter + direct-commit dedup vs PR merge commits.
|
|
266
|
+
*/
|
|
267
|
+
export async function fetchUserRepo(user, repo, since, until, ctx, logger, options = {}) {
|
|
268
|
+
const [prs, restCommits] = await Promise.all([
|
|
269
|
+
fetchPRsInWindow(user.githubToken, user.login, repo, since, until, ctx),
|
|
270
|
+
fetchDirectCommits(user.githubToken, user.login, repo, since, until, ctx),
|
|
271
|
+
]);
|
|
272
|
+
logger.debug({ repo, user: user.login, prs: prs.length, restCommits: restCommits.length }, "Fetched user/repo");
|
|
273
|
+
const prMergeShas = collectMergeShas(prs);
|
|
274
|
+
const prDataFromPrs = prs
|
|
275
|
+
.map((pr) => prNodeToDigestPRData(repo, pr, since, until))
|
|
276
|
+
.filter((p) => p !== null);
|
|
277
|
+
const directFiltered = restCommits.filter((c) => isDirectCommit(c, prMergeShas));
|
|
278
|
+
const prDataFromDirect = directFiltered.map((c) => directCommitToDigestPRData(repo, c));
|
|
279
|
+
if (!options.populateDirectCommitStats || prDataFromDirect.length === 0) {
|
|
280
|
+
return [...prDataFromPrs, ...prDataFromDirect];
|
|
281
|
+
}
|
|
282
|
+
// Enrich direct commits with per-commit stats (opt-in, concurrency=4).
|
|
283
|
+
const enriched = await batchConcurrent(prDataFromDirect.map((pd, i) => ({ pd, sha: directFiltered[i]?.sha ?? "" })), 4, async ({ pd, sha }) => {
|
|
284
|
+
if (!sha) {
|
|
285
|
+
return pd;
|
|
286
|
+
}
|
|
287
|
+
const stats = await fetchCommitStats(user.githubToken, repo, sha, ctx);
|
|
288
|
+
if (!stats) {
|
|
289
|
+
return pd;
|
|
290
|
+
}
|
|
291
|
+
return {
|
|
292
|
+
...pd,
|
|
293
|
+
additions: stats.additions,
|
|
294
|
+
deletions: stats.deletions,
|
|
295
|
+
filesChanged: stats.filesChanged,
|
|
296
|
+
commits: pd.commits.map((c) => ({
|
|
297
|
+
...c,
|
|
298
|
+
additions: stats.additions,
|
|
299
|
+
deletions: stats.deletions,
|
|
300
|
+
filesChanged: stats.filesChanged,
|
|
301
|
+
})),
|
|
302
|
+
};
|
|
303
|
+
});
|
|
304
|
+
return [...prDataFromPrs, ...enriched];
|
|
305
|
+
}
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `dailyDigest()` — per-user-token GitHub digest, sibling to {@link analyze}.
|
|
3
|
+
*
|
|
4
|
+
* Reads commits + PRs via the GitHub API (REST + GraphQL), groups by Linear
|
|
5
|
+
* ticket, and estimates hours via OpenAI. Output shape matches {@link analyze}
|
|
6
|
+
* so consumers can use the same downstream code paths.
|
|
7
|
+
*
|
|
8
|
+
* Why this exists alongside `analyze()`:
|
|
9
|
+
* - No local git clone required (works in serverless / Trigger.dev tasks)
|
|
10
|
+
* - Per-user token isolation (different orgs → different GitHub + Linear keys)
|
|
11
|
+
* - Catches open PRs that local-git can't see
|
|
12
|
+
*
|
|
13
|
+
* Tradeoffs vs `analyze()`:
|
|
14
|
+
* - Bound to default branch + PR'd branches (can't read every ref like `--all`)
|
|
15
|
+
* - Rate-limited by GitHub (5,000/hr per token); use local `analyze()` for
|
|
16
|
+
* long historical windows.
|
|
17
|
+
* - Activity-date attribution — see [[CONTEXT.md#attribution]].
|
|
18
|
+
*/
|
|
19
|
+
import { createOpenAI } from "@ai-sdk/openai";
|
|
20
|
+
import pino from "pino";
|
|
21
|
+
import { z } from "zod/v4";
|
|
22
|
+
import { aggregateByTicket } from "../aggregator.js";
|
|
23
|
+
import { estimateTicketHours } from "../ai/summarizer.js";
|
|
24
|
+
import { createLinearClient } from "../linear/client.js";
|
|
25
|
+
import { fetchTickets } from "../linear/tickets.js";
|
|
26
|
+
import { buildRepoResults, buildUserSummary } from "../output/summary.js";
|
|
27
|
+
import { extractFromPR } from "../parser/ticket-extractor.js";
|
|
28
|
+
import { UNLINKED_ID } from "../types.js";
|
|
29
|
+
import { batchConcurrent, partition, tryCatch } from "../utils.js";
|
|
30
|
+
import { fetchUserRepo, makeCallContext, preflight } from "./github-fetch.js";
|
|
31
|
+
const DEFAULT_MODEL = "gpt-4o";
|
|
32
|
+
const DEFAULT_ORG = "daily-digest";
|
|
33
|
+
const DEFAULT_REPO_CONCURRENCY = 4;
|
|
34
|
+
const DEFAULT_USER_CONCURRENCY = 5;
|
|
35
|
+
const repoSchema = z.object({
|
|
36
|
+
slug: z.string().regex(/^[\w.-]+\/[\w.-]+$/, "must be owner/repo format"),
|
|
37
|
+
});
|
|
38
|
+
const userSchema = z.object({
|
|
39
|
+
login: z.string().min(1),
|
|
40
|
+
githubToken: z.string().min(1),
|
|
41
|
+
linearApiKey: z.string().min(1),
|
|
42
|
+
repos: z.array(repoSchema).min(1),
|
|
43
|
+
});
|
|
44
|
+
const paramsSchema = z.object({
|
|
45
|
+
users: z.array(userSchema).min(1),
|
|
46
|
+
openaiApiKey: z.string().min(1).optional(),
|
|
47
|
+
since: z.iso.datetime(),
|
|
48
|
+
until: z.iso.datetime(),
|
|
49
|
+
model: z.string().min(1).optional(),
|
|
50
|
+
org: z.string().min(1).optional(),
|
|
51
|
+
skipEstimation: z.boolean().optional(),
|
|
52
|
+
repoConcurrency: z.number().int().positive().optional(),
|
|
53
|
+
userConcurrency: z.number().int().positive().optional(),
|
|
54
|
+
populateDirectCommitStats: z.boolean().optional(),
|
|
55
|
+
});
|
|
56
|
+
export async function dailyDigest(params) {
|
|
57
|
+
const validated = paramsSchema.parse({
|
|
58
|
+
users: params.users,
|
|
59
|
+
openaiApiKey: params.openaiApiKey,
|
|
60
|
+
since: params.since,
|
|
61
|
+
until: params.until,
|
|
62
|
+
model: params.model,
|
|
63
|
+
org: params.org,
|
|
64
|
+
skipEstimation: params.skipEstimation,
|
|
65
|
+
repoConcurrency: params.repoConcurrency,
|
|
66
|
+
userConcurrency: params.userConcurrency,
|
|
67
|
+
populateDirectCommitStats: params.populateDirectCommitStats,
|
|
68
|
+
});
|
|
69
|
+
if (!(validated.skipEstimation || validated.openaiApiKey)) {
|
|
70
|
+
throw new Error("openaiApiKey is required when skipEstimation is not true");
|
|
71
|
+
}
|
|
72
|
+
const logger = params.logger ?? pino({ level: "silent" });
|
|
73
|
+
const model = validated.model ?? DEFAULT_MODEL;
|
|
74
|
+
const org = validated.org ?? DEFAULT_ORG;
|
|
75
|
+
const userConcurrency = validated.userConcurrency ?? DEFAULT_USER_CONCURRENCY;
|
|
76
|
+
// Dedup Linear clients by API key — avoids N viewer auth round-trips for
|
|
77
|
+
// multiple users sharing the same Linear workspace.
|
|
78
|
+
const linearClientCache = new Map();
|
|
79
|
+
for (const user of validated.users) {
|
|
80
|
+
if (!linearClientCache.has(user.linearApiKey)) {
|
|
81
|
+
linearClientCache.set(user.linearApiKey, createLinearClient(user.linearApiKey, logger));
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
const openaiKey = validated.openaiApiKey ?? "";
|
|
85
|
+
const languageModel = validated.skipEstimation
|
|
86
|
+
? null
|
|
87
|
+
: createOpenAI({ apiKey: openaiKey })(model);
|
|
88
|
+
const generatedAt = new Date().toISOString();
|
|
89
|
+
const dateRange = {
|
|
90
|
+
from: validated.since.slice(0, 10),
|
|
91
|
+
to: validated.until.slice(0, 10),
|
|
92
|
+
};
|
|
93
|
+
const users = await batchConcurrent(validated.users, userConcurrency, (user) => digestOneUser({
|
|
94
|
+
user,
|
|
95
|
+
since: validated.since,
|
|
96
|
+
until: validated.until,
|
|
97
|
+
org,
|
|
98
|
+
languageModel,
|
|
99
|
+
logger: logger.child({ user: user.login }),
|
|
100
|
+
repoConcurrency: validated.repoConcurrency ?? DEFAULT_REPO_CONCURRENCY,
|
|
101
|
+
skipEstimation: validated.skipEstimation ?? false,
|
|
102
|
+
linearClientCache,
|
|
103
|
+
}));
|
|
104
|
+
return { org, generatedAt, dateRange, users };
|
|
105
|
+
}
|
|
106
|
+
async function digestOneUser(ctx) {
|
|
107
|
+
const t0 = Date.now();
|
|
108
|
+
const callCtx = makeCallContext();
|
|
109
|
+
const remaining = await preflight(ctx.user.githubToken, callCtx);
|
|
110
|
+
if (!remaining) {
|
|
111
|
+
callCtx.errors.push("preflight failed — token invalid or rate-limit exhausted");
|
|
112
|
+
return emptyResult(ctx, callCtx, t0, 0);
|
|
113
|
+
}
|
|
114
|
+
const repoPRs = await batchConcurrent(ctx.user.repos, ctx.repoConcurrency, async (repo) => ({
|
|
115
|
+
repo: repo.slug,
|
|
116
|
+
prs: await fetchUserRepo(ctx.user, repo.slug, ctx.since, ctx.until, callCtx, ctx.logger),
|
|
117
|
+
}));
|
|
118
|
+
const totalPRs = repoPRs.reduce((sum, r) => sum + r.prs.length, 0);
|
|
119
|
+
ctx.logger.info({ totalPRs, repos: repoPRs.length }, "Digest fetch complete");
|
|
120
|
+
if (!totalPRs) {
|
|
121
|
+
return emptyResult(ctx, callCtx, t0, remaining);
|
|
122
|
+
}
|
|
123
|
+
const tickets = await resolveTickets(ctx, [...collectTicketIds(repoPRs)], callCtx);
|
|
124
|
+
const aggregated = aggregateByTicket(repoPRs, tickets, ctx.logger);
|
|
125
|
+
const estimated = await estimateAll(aggregated, ctx, callCtx);
|
|
126
|
+
const repos = buildRepoResults(aggregated, estimated);
|
|
127
|
+
const userSummary = buildUserSummary(estimated);
|
|
128
|
+
const totalHours = repos.reduce((sum, r) => sum + r.totalHours, 0);
|
|
129
|
+
return {
|
|
130
|
+
login: ctx.user.login,
|
|
131
|
+
totalHours: Math.round(totalHours * 10) / 10,
|
|
132
|
+
repos,
|
|
133
|
+
userSummary,
|
|
134
|
+
stats: makeStats(callCtx, t0, remaining),
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
const makeStats = (callCtx, t0, remaining) => ({
|
|
138
|
+
apiCalls: callCtx.apiCalls,
|
|
139
|
+
durationMs: Date.now() - t0,
|
|
140
|
+
rateLimitAtStart: remaining,
|
|
141
|
+
errors: callCtx.errors,
|
|
142
|
+
});
|
|
143
|
+
const emptyResult = (ctx, callCtx, t0, remaining) => ({
|
|
144
|
+
login: ctx.user.login,
|
|
145
|
+
totalHours: 0,
|
|
146
|
+
repos: [],
|
|
147
|
+
userSummary: [],
|
|
148
|
+
stats: makeStats(callCtx, t0, remaining),
|
|
149
|
+
});
|
|
150
|
+
const collectTicketIds = (repoPRs) => new Set(repoPRs.flatMap(({ prs }) => prs.flatMap(extractFromPR)));
|
|
151
|
+
async function resolveTickets(ctx, ids, callCtx) {
|
|
152
|
+
if (!ids.length) {
|
|
153
|
+
return new Map();
|
|
154
|
+
}
|
|
155
|
+
const cachedClient = ctx.linearClientCache.get(ctx.user.linearApiKey) ??
|
|
156
|
+
Promise.reject(new Error("linear client not cached"));
|
|
157
|
+
const clientResult = await tryCatch(cachedClient);
|
|
158
|
+
if (!clientResult.ok) {
|
|
159
|
+
callCtx.errors.push(`linear: ${clientResult.error}`);
|
|
160
|
+
return new Map();
|
|
161
|
+
}
|
|
162
|
+
const tickets = await tryCatch(fetchTickets(clientResult.data, ids, ctx.logger));
|
|
163
|
+
if (!tickets.ok) {
|
|
164
|
+
callCtx.errors.push(`linear-fetch: ${tickets.error}`);
|
|
165
|
+
return new Map();
|
|
166
|
+
}
|
|
167
|
+
return tickets.data;
|
|
168
|
+
}
|
|
169
|
+
const isResolved = (entry) => !!entry.ticket && entry.ticketId !== UNLINKED_ID;
|
|
170
|
+
const markUnmapped = (entry) => ({
|
|
171
|
+
...entry,
|
|
172
|
+
estimatedHours: 0,
|
|
173
|
+
confidence: "low",
|
|
174
|
+
reasoning: "Unmapped: Linear ticket not found — requires manual review",
|
|
175
|
+
});
|
|
176
|
+
const markSkipped = (entry) => ({
|
|
177
|
+
...entry,
|
|
178
|
+
estimatedHours: 0,
|
|
179
|
+
confidence: "low",
|
|
180
|
+
reasoning: "AI estimation skipped",
|
|
181
|
+
});
|
|
182
|
+
async function estimateAll(aggregated, ctx, callCtx) {
|
|
183
|
+
const entries = [...aggregated.values()].flat();
|
|
184
|
+
const [resolved, unresolved] = partition(entries, isResolved);
|
|
185
|
+
const unmapped = unresolved.map(markUnmapped);
|
|
186
|
+
if (!resolved.length) {
|
|
187
|
+
return unmapped;
|
|
188
|
+
}
|
|
189
|
+
if (ctx.skipEstimation) {
|
|
190
|
+
return [...resolved.map(markSkipped), ...unmapped];
|
|
191
|
+
}
|
|
192
|
+
const dateRange = { from: ctx.since.slice(0, 10), to: ctx.until.slice(0, 10) };
|
|
193
|
+
const result = await tryCatch(estimateTicketHours(resolved, { model: ctx.languageModel, dateRange }, ctx.logger));
|
|
194
|
+
if (!result.ok) {
|
|
195
|
+
callCtx.errors.push(`ai-estimate: ${result.error}`);
|
|
196
|
+
return [...resolved, ...unmapped];
|
|
197
|
+
}
|
|
198
|
+
return [...result.data, ...unmapped];
|
|
199
|
+
}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import type { DigestPRData } from "./types.js";
|
|
2
|
+
/**
|
|
3
|
+
* GitHub GraphQL PullRequest node, narrowed to fields we read. Authoritative
|
|
4
|
+
* for "commits inside a PR" — preserves original `authoredDate` and
|
|
5
|
+
* `committedDate` even after a rebase.
|
|
6
|
+
*/
|
|
7
|
+
export interface GraphQLPullRequest {
|
|
8
|
+
number: number;
|
|
9
|
+
title: string;
|
|
10
|
+
state: "OPEN" | "MERGED" | "CLOSED";
|
|
11
|
+
createdAt: string;
|
|
12
|
+
mergedAt: string | null;
|
|
13
|
+
updatedAt: string;
|
|
14
|
+
headRefName: string;
|
|
15
|
+
url: string;
|
|
16
|
+
author: {
|
|
17
|
+
login: string;
|
|
18
|
+
} | null;
|
|
19
|
+
mergeCommit: {
|
|
20
|
+
oid: string;
|
|
21
|
+
} | null;
|
|
22
|
+
commits: {
|
|
23
|
+
nodes: Array<{
|
|
24
|
+
commit: {
|
|
25
|
+
oid: string;
|
|
26
|
+
authoredDate: string;
|
|
27
|
+
committedDate: string;
|
|
28
|
+
message: string;
|
|
29
|
+
additions: number;
|
|
30
|
+
deletions: number;
|
|
31
|
+
changedFilesIfAvailable: number | null;
|
|
32
|
+
author: {
|
|
33
|
+
user: {
|
|
34
|
+
login: string;
|
|
35
|
+
} | null;
|
|
36
|
+
email: string | null;
|
|
37
|
+
} | null;
|
|
38
|
+
};
|
|
39
|
+
}>;
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
/** GitHub REST commit, narrowed. Used for direct-to-main commits with no PR. */
|
|
43
|
+
export interface RestCommit {
|
|
44
|
+
sha: string;
|
|
45
|
+
commit: {
|
|
46
|
+
author: {
|
|
47
|
+
name: string;
|
|
48
|
+
email: string;
|
|
49
|
+
date: string;
|
|
50
|
+
} | null;
|
|
51
|
+
committer: {
|
|
52
|
+
name: string;
|
|
53
|
+
email: string;
|
|
54
|
+
date: string;
|
|
55
|
+
} | null;
|
|
56
|
+
message: string;
|
|
57
|
+
};
|
|
58
|
+
author: {
|
|
59
|
+
login: string;
|
|
60
|
+
} | null;
|
|
61
|
+
parents: Array<{
|
|
62
|
+
sha: string;
|
|
63
|
+
}>;
|
|
64
|
+
}
|
|
65
|
+
/** Stable identifier for cross-run dedup. PR-grouped when a PR exists. */
|
|
66
|
+
export declare function buildWorkUnit(repo: string, prNumber: number, directSha?: string): string;
|
|
67
|
+
/** Inclusive on both ends — matches GitHub `since`/`until` semantics. */
|
|
68
|
+
export declare function isWithinWindow(date: string, since: string, until: string): boolean;
|
|
69
|
+
/** Q8: drop closed-without-merge PRs. OPEN and MERGED stay. */
|
|
70
|
+
export declare function shouldIncludePr(state: GraphQLPullRequest["state"]): boolean;
|
|
71
|
+
/**
|
|
72
|
+
* Convert a GraphQL PR into a {@link DigestPRData}, filtering inner commits to
|
|
73
|
+
* the activity window. Returns null if the PR has no commits in window (e.g.
|
|
74
|
+
* stale PR with `updatedAt` in window due to a label change but no new code).
|
|
75
|
+
*/
|
|
76
|
+
export declare function prNodeToDigestPRData(repo: string, pr: GraphQLPullRequest, since: string, until: string): DigestPRData | null;
|
|
77
|
+
/**
|
|
78
|
+
* Convert a single REST direct-to-main commit into a synthetic PRData. Each
|
|
79
|
+
* direct commit is its own work unit (Q4) — no grouping by author or day.
|
|
80
|
+
*/
|
|
81
|
+
export declare function directCommitToDigestPRData(repo: string, commit: RestCommit): DigestPRData;
|
|
82
|
+
/** A REST commit is "direct" iff it's not a merge AND not the squash-output of a PR. */
|
|
83
|
+
export declare function isDirectCommit(commit: RestCommit, prMergeShas: Set<string>): boolean;
|
|
84
|
+
/**
|
|
85
|
+
* Collect all PR mergeCommit SHAs from a list of PR nodes. Used to filter out
|
|
86
|
+
* squash-merge commits from the REST direct-commits arm — those commits
|
|
87
|
+
* represent work already counted via the GraphQL PR arm.
|
|
88
|
+
*/
|
|
89
|
+
export declare function collectMergeShas(prs: GraphQLPullRequest[]): Set<string>;
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
/** Stable identifier for cross-run dedup. PR-grouped when a PR exists. */
|
|
2
|
+
export function buildWorkUnit(repo, prNumber, directSha) {
|
|
3
|
+
return prNumber > 0 ? `${repo}:pr-${prNumber}` : `${repo}:direct:${directSha}`;
|
|
4
|
+
}
|
|
5
|
+
/** Inclusive on both ends — matches GitHub `since`/`until` semantics. */
|
|
6
|
+
export function isWithinWindow(date, since, until) {
|
|
7
|
+
return date >= since && date <= until;
|
|
8
|
+
}
|
|
9
|
+
/** Q8: drop closed-without-merge PRs. OPEN and MERGED stay. */
|
|
10
|
+
export function shouldIncludePr(state) {
|
|
11
|
+
return state === "OPEN" || state === "MERGED";
|
|
12
|
+
}
|
|
13
|
+
function commitNodeToCommitData(node) {
|
|
14
|
+
const { commit } = node;
|
|
15
|
+
return {
|
|
16
|
+
sha: commit.oid,
|
|
17
|
+
message: commit.message,
|
|
18
|
+
author: commit.author?.user?.login ?? commit.author?.email ?? "unknown",
|
|
19
|
+
// Q10/B: bucket by activity date — use committedDate, not authoredDate.
|
|
20
|
+
date: commit.committedDate,
|
|
21
|
+
additions: commit.additions,
|
|
22
|
+
deletions: commit.deletions,
|
|
23
|
+
filesChanged: commit.changedFilesIfAvailable ?? 0,
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Convert a GraphQL PR into a {@link DigestPRData}, filtering inner commits to
|
|
28
|
+
* the activity window. Returns null if the PR has no commits in window (e.g.
|
|
29
|
+
* stale PR with `updatedAt` in window due to a label change but no new code).
|
|
30
|
+
*/
|
|
31
|
+
export function prNodeToDigestPRData(repo, pr, since, until) {
|
|
32
|
+
if (!shouldIncludePr(pr.state)) {
|
|
33
|
+
return null;
|
|
34
|
+
}
|
|
35
|
+
const commitsInWindow = pr.commits.nodes
|
|
36
|
+
.filter((n) => isWithinWindow(n.commit.committedDate, since, until))
|
|
37
|
+
.map(commitNodeToCommitData);
|
|
38
|
+
if (commitsInWindow.length === 0) {
|
|
39
|
+
return null;
|
|
40
|
+
}
|
|
41
|
+
const additions = commitsInWindow.reduce((sum, c) => sum + c.additions, 0);
|
|
42
|
+
const deletions = commitsInWindow.reduce((sum, c) => sum + c.deletions, 0);
|
|
43
|
+
const filesChanged = commitsInWindow.reduce((sum, c) => sum + c.filesChanged, 0);
|
|
44
|
+
return {
|
|
45
|
+
number: pr.number,
|
|
46
|
+
title: pr.title,
|
|
47
|
+
author: pr.author?.login ?? "unknown",
|
|
48
|
+
branchName: pr.headRefName,
|
|
49
|
+
additions,
|
|
50
|
+
deletions,
|
|
51
|
+
filesChanged,
|
|
52
|
+
commits: commitsInWindow,
|
|
53
|
+
mergedAt: pr.mergedAt,
|
|
54
|
+
createdAt: pr.createdAt,
|
|
55
|
+
url: pr.url,
|
|
56
|
+
workUnit: buildWorkUnit(repo, pr.number),
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Convert a single REST direct-to-main commit into a synthetic PRData. Each
|
|
61
|
+
* direct commit is its own work unit (Q4) — no grouping by author or day.
|
|
62
|
+
*/
|
|
63
|
+
export function directCommitToDigestPRData(repo, commit) {
|
|
64
|
+
const message = commit.commit.message;
|
|
65
|
+
const titleLine = message.split("\n")[0];
|
|
66
|
+
// Q10/B: bucket on committer.date, not author.date.
|
|
67
|
+
const committedDate = commit.commit.committer?.date ?? commit.commit.author?.date ?? "";
|
|
68
|
+
const authorLogin = commit.author?.login ?? commit.commit.author?.email ?? "unknown";
|
|
69
|
+
const commitData = {
|
|
70
|
+
sha: commit.sha,
|
|
71
|
+
message,
|
|
72
|
+
author: authorLogin,
|
|
73
|
+
date: committedDate,
|
|
74
|
+
// REST list endpoint omits stats; populated separately if a per-commit call
|
|
75
|
+
// is made. Zero is safe — heuristic estimator handles it.
|
|
76
|
+
additions: 0,
|
|
77
|
+
deletions: 0,
|
|
78
|
+
filesChanged: 0,
|
|
79
|
+
};
|
|
80
|
+
return {
|
|
81
|
+
number: 0,
|
|
82
|
+
title: titleLine,
|
|
83
|
+
author: authorLogin,
|
|
84
|
+
branchName: "",
|
|
85
|
+
additions: 0,
|
|
86
|
+
deletions: 0,
|
|
87
|
+
filesChanged: 0,
|
|
88
|
+
commits: [commitData],
|
|
89
|
+
mergedAt: null,
|
|
90
|
+
createdAt: committedDate,
|
|
91
|
+
url: `https://github.com/${repo}/commit/${commit.sha}`,
|
|
92
|
+
workUnit: buildWorkUnit(repo, 0, commit.sha),
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
/** A REST commit is "direct" iff it's not a merge AND not the squash-output of a PR. */
|
|
96
|
+
export function isDirectCommit(commit, prMergeShas) {
|
|
97
|
+
const isMerge = commit.parents.length > 1;
|
|
98
|
+
const isPrMerge = prMergeShas.has(commit.sha);
|
|
99
|
+
return !(isMerge || isPrMerge);
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Collect all PR mergeCommit SHAs from a list of PR nodes. Used to filter out
|
|
103
|
+
* squash-merge commits from the REST direct-commits arm — those commits
|
|
104
|
+
* represent work already counted via the GraphQL PR arm.
|
|
105
|
+
*/
|
|
106
|
+
export function collectMergeShas(prs) {
|
|
107
|
+
return new Set(prs.map((pr) => pr.mergeCommit?.oid).filter((oid) => !!oid));
|
|
108
|
+
}
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Public types for {@link dailyDigest} — a stateless, fail-safe per-user worker
|
|
3
|
+
* that reads work-attribution data from the GitHub API for a date window and
|
|
4
|
+
* produces senku-shaped output per user.
|
|
5
|
+
*
|
|
6
|
+
* Design tradeoffs locked in this module:
|
|
7
|
+
* - **Stateless + fingerprint**: every call is pure; consumers dedup across
|
|
8
|
+
* runs via the per-PR `workUnit` field on the emitted {@link PRData}.
|
|
9
|
+
* - **PR-grouped workUnit**: all commits within one GitHub PR share a
|
|
10
|
+
* workUnit, so squash-merges across runs collapse to one logical unit.
|
|
11
|
+
* - **Per-user output**: errors isolate to one user; other users' results are
|
|
12
|
+
* unaffected. Consumers map multiple Users to one person if needed.
|
|
13
|
+
* - **OPEN + MERGED only**: closed-without-merge PRs are excluded upstream.
|
|
14
|
+
* - **Window applies to activity** (committerDate / PR updatedAt). Cherry-
|
|
15
|
+
* picks and rebased commits bucket on the day they landed, not the day
|
|
16
|
+
* they were originally written.
|
|
17
|
+
*/
|
|
18
|
+
import type pino from "pino";
|
|
19
|
+
import type { PRData, RepoResult, UserSummary } from "../types.js";
|
|
20
|
+
/** A repository to scan, identified by `"owner/repo"`. */
|
|
21
|
+
export interface DigestRepo {
|
|
22
|
+
slug: string;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Sentinel for commit/PR author when the GitHub API returns no login and no
|
|
26
|
+
* email. Typed as a const so narrowing works on equality checks.
|
|
27
|
+
*/
|
|
28
|
+
export declare const UNKNOWN_AUTHOR: "unknown";
|
|
29
|
+
/**
|
|
30
|
+
* One identity-in-one-org. GitHub and Linear are both org-scoped, so they
|
|
31
|
+
* travel together inside a User entry. One human appears as N Users if they
|
|
32
|
+
* work across N orgs.
|
|
33
|
+
*/
|
|
34
|
+
export interface DigestUser {
|
|
35
|
+
/** GitHub login (e.g. `"hrithikbluelabel"`). */
|
|
36
|
+
login: string;
|
|
37
|
+
/**
|
|
38
|
+
* GitHub PAT with `repo` (classic) or `Contents: read` + `Pull requests: read`
|
|
39
|
+
* (fine-grained). Used as the bearer for REST + GraphQL.
|
|
40
|
+
*/
|
|
41
|
+
githubToken: string;
|
|
42
|
+
/** Linear workspace key (`lin_api_...`). Resolves ticket IDs to titles/states. */
|
|
43
|
+
linearApiKey: string;
|
|
44
|
+
/** Repos to scan for this user. */
|
|
45
|
+
repos: DigestRepo[];
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Parameters for {@link dailyDigest}.
|
|
49
|
+
*/
|
|
50
|
+
export interface DigestParams {
|
|
51
|
+
users: DigestUser[];
|
|
52
|
+
/**
|
|
53
|
+
* OpenAI API key. Optional when `skipEstimation` is `true`.
|
|
54
|
+
* Cost note: the default model (`gpt-4o`) is called once per resolved ticket;
|
|
55
|
+
* large orgs can accumulate meaningful spend — use `gpt-4o-mini` or
|
|
56
|
+
* `skipEstimation: true` for zero-cost dry runs.
|
|
57
|
+
*/
|
|
58
|
+
openaiApiKey?: string;
|
|
59
|
+
/** ISO-8601 window start (inclusive). */
|
|
60
|
+
since: string;
|
|
61
|
+
/** ISO-8601 window end (inclusive). */
|
|
62
|
+
until: string;
|
|
63
|
+
/**
|
|
64
|
+
* Override the OpenAI model. Default: `"gpt-4o"`.
|
|
65
|
+
* `gpt-4o-mini` is cheaper with similar accuracy for hour estimation.
|
|
66
|
+
*/
|
|
67
|
+
model?: string;
|
|
68
|
+
/** Label that flows into DigestResult.org. Default: `"daily-digest"`. */
|
|
69
|
+
org?: string;
|
|
70
|
+
/** Optional pino logger for diagnostics. Default: a silent logger. */
|
|
71
|
+
logger?: pino.Logger;
|
|
72
|
+
/**
|
|
73
|
+
* When `true`, skip OpenAI estimation entirely. All resolved entries receive
|
|
74
|
+
* `estimatedHours: 0, confidence: "low"`. Makes `openaiApiKey` optional.
|
|
75
|
+
* Useful for CI dry-runs or rate-limit-sensitive contexts.
|
|
76
|
+
*/
|
|
77
|
+
skipEstimation?: boolean;
|
|
78
|
+
/**
|
|
79
|
+
* Max repos fetched in parallel per user. Default: 4.
|
|
80
|
+
* Raise cautiously — GitHub secondary rate limits penalise burst parallelism.
|
|
81
|
+
*/
|
|
82
|
+
repoConcurrency?: number;
|
|
83
|
+
/**
|
|
84
|
+
* Max users processed in parallel. Default: 5.
|
|
85
|
+
* Each user makes N*repo API calls; keep conservative in large orgs.
|
|
86
|
+
*/
|
|
87
|
+
userConcurrency?: number;
|
|
88
|
+
/**
|
|
89
|
+
* When `true`, pass through to fetchUserRepo to populate direct-commit stats.
|
|
90
|
+
* Default: false.
|
|
91
|
+
*/
|
|
92
|
+
populateDirectCommitStats?: boolean;
|
|
93
|
+
}
|
|
94
|
+
/** Per-user telemetry — useful for cost tracking + debugging in consumers. */
|
|
95
|
+
export interface DigestStats {
|
|
96
|
+
apiCalls: number;
|
|
97
|
+
durationMs: number;
|
|
98
|
+
/**
|
|
99
|
+
* GitHub API rate-limit remaining calls at the time of preflight.
|
|
100
|
+
* Reflects the budget at the start of the run, not after it completes.
|
|
101
|
+
*/
|
|
102
|
+
rateLimitAtStart: number;
|
|
103
|
+
/** Non-fatal errors collected during the run (bad repo, Linear failure, etc.). */
|
|
104
|
+
errors: string[];
|
|
105
|
+
}
|
|
106
|
+
/** Per-user result. Shared fields (`org`, `generatedAt`, `dateRange`) live on {@link DigestResult}. */
|
|
107
|
+
export interface DigestUserResult {
|
|
108
|
+
login: string;
|
|
109
|
+
totalHours: number;
|
|
110
|
+
repos: RepoResult[];
|
|
111
|
+
userSummary: UserSummary[];
|
|
112
|
+
stats: DigestStats;
|
|
113
|
+
}
|
|
114
|
+
/** Top-level return value from {@link dailyDigest}. */
|
|
115
|
+
export interface DigestResult {
|
|
116
|
+
org: string;
|
|
117
|
+
generatedAt: string;
|
|
118
|
+
dateRange: {
|
|
119
|
+
from: string;
|
|
120
|
+
to: string;
|
|
121
|
+
};
|
|
122
|
+
users: DigestUserResult[];
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* PRData extended with the dedup fingerprint. The base {@link PRData} flows
|
|
126
|
+
* unmodified through senku's existing pipeline; `workUnit` is populated only
|
|
127
|
+
* on digest-emitted PRs.
|
|
128
|
+
*
|
|
129
|
+
* Format:
|
|
130
|
+
* - PR-derived: `"<owner>/<repo>:pr-<number>"`
|
|
131
|
+
* - Direct commit: `"<owner>/<repo>:direct:<sha>"`
|
|
132
|
+
*/
|
|
133
|
+
export interface DigestPRData extends PRData {
|
|
134
|
+
workUnit: string;
|
|
135
|
+
}
|
package/dist/lib.d.ts
CHANGED
|
@@ -16,6 +16,8 @@ export type { AnalyzeDeps } from "./analyze.js";
|
|
|
16
16
|
export { analyze } from "./analyze.js";
|
|
17
17
|
export { discoverRepos } from "./cli/discover.js";
|
|
18
18
|
export { cloneRepo } from "./clone.js";
|
|
19
|
+
export { dailyDigest, dailyDigest as digest } from "./digest/index.js";
|
|
20
|
+
export type { DigestParams, DigestPRData, DigestRepo, DigestResult, DigestStats, DigestUser, DigestUserResult, } from "./digest/types.js";
|
|
19
21
|
export { assertGitAvailable, runGit } from "./exec.js";
|
|
20
22
|
export type { GitHubUser } from "./github-user.js";
|
|
21
23
|
export { fetchGitHubUser } from "./github-user.js";
|
package/dist/lib.js
CHANGED
|
@@ -15,6 +15,7 @@ export { estimateTicketHours } from "./ai/summarizer.js";
|
|
|
15
15
|
export { analyze } from "./analyze.js";
|
|
16
16
|
export { discoverRepos } from "./cli/discover.js";
|
|
17
17
|
export { cloneRepo } from "./clone.js";
|
|
18
|
+
export { dailyDigest, dailyDigest as digest } from "./digest/index.js";
|
|
18
19
|
export { assertGitAvailable, runGit } from "./exec.js";
|
|
19
20
|
export { fetchGitHubUser } from "./github-user.js";
|
|
20
21
|
export { createLinearClient } from "./linear/client.js";
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { RepoResult, TicketEntry, UserSummary } from "../types.js";
|
|
2
|
+
/** Build per-repo {@link RepoResult} list from aggregation keys + estimated entries. */
|
|
3
|
+
export declare function buildRepoResults(aggregated: Map<string, TicketEntry[]>, estimated: TicketEntry[]): RepoResult[];
|
|
4
|
+
/** Build per-user hour summary by splitting each ticket's hours equally across contributors. */
|
|
5
|
+
export declare function buildUserSummary(estimated: TicketEntry[]): UserSummary[];
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/** Build per-repo {@link RepoResult} list from aggregation keys + estimated entries. */
|
|
2
|
+
export function buildRepoResults(aggregated, estimated) {
|
|
3
|
+
return [...aggregated.keys()].map((repoName) => {
|
|
4
|
+
const repoTickets = estimated.filter((e) => e.repo === repoName);
|
|
5
|
+
const totalHours = repoTickets.reduce((sum, t) => sum + t.estimatedHours, 0);
|
|
6
|
+
return { repo: repoName, totalHours: Math.round(totalHours * 10) / 10, tickets: repoTickets };
|
|
7
|
+
});
|
|
8
|
+
}
|
|
9
|
+
/** Build per-user hour summary by splitting each ticket's hours equally across contributors. */
|
|
10
|
+
export function buildUserSummary(estimated) {
|
|
11
|
+
const userMap = estimated.reduce((map, entry) => {
|
|
12
|
+
const perUserHours = entry.users.length ? entry.estimatedHours / entry.users.length : 0;
|
|
13
|
+
for (const user of entry.users) {
|
|
14
|
+
const existing = map.get(user) ?? { hours: 0, tickets: new Set() };
|
|
15
|
+
existing.hours += perUserHours;
|
|
16
|
+
existing.tickets.add(entry.ticketId);
|
|
17
|
+
map.set(user, existing);
|
|
18
|
+
}
|
|
19
|
+
return map;
|
|
20
|
+
}, new Map());
|
|
21
|
+
return [...userMap.entries()].map(([user, data]) => ({
|
|
22
|
+
user,
|
|
23
|
+
totalHours: Math.round(data.hours * 10) / 10,
|
|
24
|
+
ticketCount: data.tickets.size,
|
|
25
|
+
}));
|
|
26
|
+
}
|
package/dist/pipeline.js
CHANGED
|
@@ -3,6 +3,7 @@ import { aggregateByTicket } from "./aggregator.js";
|
|
|
3
3
|
import { estimateTicketHours } from "./ai/summarizer.js";
|
|
4
4
|
import { createLinearClient } from "./linear/client.js";
|
|
5
5
|
import { fetchTickets } from "./linear/tickets.js";
|
|
6
|
+
import { buildRepoResults, buildUserSummary } from "./output/summary.js";
|
|
6
7
|
import { extractFromPR } from "./parser/ticket-extractor.js";
|
|
7
8
|
import { getRepoName, scrapeRepo } from "./scrape.js";
|
|
8
9
|
import { UNLINKED_ID } from "./types.js";
|
|
@@ -60,34 +61,6 @@ async function runAiEstimation(resolvedEntries, unmappedWithFlag, input, dateRan
|
|
|
60
61
|
progress(`AI estimation complete for ${aiEstimated.length} tickets`);
|
|
61
62
|
return [...aiEstimated, ...unmappedWithFlag];
|
|
62
63
|
}
|
|
63
|
-
function buildRepoResults(aggregated, estimated) {
|
|
64
|
-
return [...aggregated.keys()].map((repoName) => {
|
|
65
|
-
const repoTickets = estimated.filter((e) => e.repo === repoName);
|
|
66
|
-
const totalHours = repoTickets.reduce((sum, t) => sum + t.estimatedHours, 0);
|
|
67
|
-
return { repo: repoName, totalHours: Math.round(totalHours * 10) / 10, tickets: repoTickets };
|
|
68
|
-
});
|
|
69
|
-
}
|
|
70
|
-
function buildUserSummary(estimated) {
|
|
71
|
-
const userHoursMap = new Map();
|
|
72
|
-
for (const entry of estimated) {
|
|
73
|
-
const perUserHours = entry.estimatedHours / entry.users.length;
|
|
74
|
-
for (const user of entry.users) {
|
|
75
|
-
const existing = userHoursMap.get(user);
|
|
76
|
-
if (existing) {
|
|
77
|
-
existing.hours += perUserHours;
|
|
78
|
-
existing.tickets.add(entry.ticketId);
|
|
79
|
-
}
|
|
80
|
-
else {
|
|
81
|
-
userHoursMap.set(user, { hours: perUserHours, tickets: new Set([entry.ticketId]) });
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
return [...userHoursMap.entries()].map(([user, data]) => ({
|
|
86
|
-
user,
|
|
87
|
-
totalHours: Math.round(data.hours * 10) / 10,
|
|
88
|
-
ticketCount: data.tickets.size,
|
|
89
|
-
}));
|
|
90
|
-
}
|
|
91
64
|
export function defaultLanguageModel(apiKey, modelName) {
|
|
92
65
|
return createOpenAI({ apiKey })(modelName);
|
|
93
66
|
}
|
package/dist/utils.d.ts
CHANGED
|
@@ -13,4 +13,9 @@ export declare function formatDate(date: Date): string;
|
|
|
13
13
|
export declare function expandHome(path: string): string;
|
|
14
14
|
export declare function batchConcurrent<T, R>(items: T[], concurrency: number, fn: (item: T) => Promise<R>): Promise<R[]>;
|
|
15
15
|
export declare function sanitizeFilename(name: string): string;
|
|
16
|
+
/**
|
|
17
|
+
* Split `items` into two arrays in a single pass.
|
|
18
|
+
* Returns `[matching, nonMatching]`.
|
|
19
|
+
*/
|
|
20
|
+
export declare function partition<T>(items: T[], predicate: (item: T) => boolean): [T[], T[]];
|
|
16
21
|
export {};
|
package/dist/utils.js
CHANGED
|
@@ -42,3 +42,19 @@ export function sanitizeFilename(name) {
|
|
|
42
42
|
.slice(0, 100);
|
|
43
43
|
return sanitized.length > 0 ? sanitized : "unnamed";
|
|
44
44
|
}
|
|
45
|
+
/**
|
|
46
|
+
* Split `items` into two arrays in a single pass.
|
|
47
|
+
* Returns `[matching, nonMatching]`.
|
|
48
|
+
*/
|
|
49
|
+
export function partition(items, predicate) {
|
|
50
|
+
return items.reduce((acc, item) => {
|
|
51
|
+
const [yes, no] = acc;
|
|
52
|
+
if (predicate(item)) {
|
|
53
|
+
yes.push(item);
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
no.push(item);
|
|
57
|
+
}
|
|
58
|
+
return acc;
|
|
59
|
+
}, [[], []]);
|
|
60
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@robotostudio/senku",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "Estimate dev hours per Linear ticket from git history + AI. A stateless library plus an interactive CLI; runs on Node and on Trigger.dev.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"type": "module",
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
"./package.json": "./package.json"
|
|
15
15
|
},
|
|
16
16
|
"bin": {
|
|
17
|
-
"senku": "
|
|
17
|
+
"senku": "dist/index.js"
|
|
18
18
|
},
|
|
19
19
|
"files": [
|
|
20
20
|
"dist",
|