revund-ruby-worker 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +190 -0
- data/README.md +60 -0
- data/bin/revund-ruby-worker +17 -0
- data/lib/ruby_worker/fetcher.rb +147 -0
- data/lib/ruby_worker/parser.rb +663 -0
- data/lib/ruby_worker/server.rb +36 -0
- data/lib/ruby_worker/service.rb +114 -0
- data/lib/ruby_worker/version.rb +5 -0
- data/proto/worker/v1/worker.proto +480 -0
- metadata +117 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'parser'
|
|
4
|
+
require_relative 'fetcher'
|
|
5
|
+
# Generated by grpc_tools_ruby_protoc from
|
|
6
|
+
# /proto/worker/v1/worker.proto. The codegen script lives at
|
|
7
|
+
# scripts/gen-proto.sh (a follow-up). Uncomment once generated:
|
|
8
|
+
#
|
|
9
|
+
# require 'worker/v1/worker_pb'
|
|
10
|
+
# require 'worker/v1/worker_services_pb'
|
|
11
|
+
|
|
12
|
+
module RubyWorker
|
|
13
|
+
# Service implements the universal `revund.worker.v1.Worker`
|
|
14
|
+
# contract — the same contract ts-worker and php-worker speak.
|
|
15
|
+
#
|
|
16
|
+
# Each handler is thin — it translates between the gRPC wire
|
|
17
|
+
# shape and the Parser domain object. The handler bodies are
|
|
18
|
+
# written against the assumed generated message shape so
|
|
19
|
+
# swapping the stub in is mechanical.
|
|
20
|
+
class Service
|
|
21
|
+
# include ::Revund::Worker::V1::Worker::Service
|
|
22
|
+
|
|
23
|
+
NAME = 'ruby-worker'
|
|
24
|
+
LANGUAGES = ['ruby'].freeze
|
|
25
|
+
CAPABILITIES = %w[parse self_fetch].freeze
|
|
26
|
+
|
|
27
|
+
# AUTH_HEADER mirrors the Go-side constant in
|
|
28
|
+
# core/pkg/worker/auth.go. The bot stamps it on every
|
|
29
|
+
# outbound RPC; this worker rejects requests without a
|
|
30
|
+
# matching value when REVUND_WORKER_SECRET is configured.
|
|
31
|
+
AUTH_HEADER = 'x-revund-worker-token'
|
|
32
|
+
AUTH_SECRET_ENV = 'REVUND_WORKER_SECRET'
|
|
33
|
+
|
|
34
|
+
def initialize
|
|
35
|
+
@parser = RubyWorker::Parser.new
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Describe — self-identifies. The bot calls this on first
|
|
39
|
+
# connect to learn what languages + capabilities this worker
|
|
40
|
+
# advertises.
|
|
41
|
+
def describe(_request, call)
|
|
42
|
+
return unauthenticated!(call) unless authorized?(call)
|
|
43
|
+
|
|
44
|
+
response_class = ::Revund::Worker::V1::DescribeResponse rescue nil
|
|
45
|
+
return nil if response_class.nil?
|
|
46
|
+
|
|
47
|
+
response_class.new(
|
|
48
|
+
name: NAME,
|
|
49
|
+
version: Server::VERSION,
|
|
50
|
+
languages: LANGUAGES,
|
|
51
|
+
capabilities: CAPABILITIES,
|
|
52
|
+
)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def health(_request, call)
|
|
56
|
+
return unauthenticated!(call) unless authorized?(call)
|
|
57
|
+
|
|
58
|
+
response_class = ::Revund::Worker::V1::HealthResponse rescue nil
|
|
59
|
+
return nil if response_class.nil?
|
|
60
|
+
|
|
61
|
+
response_class.new(version: Server::VERSION)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def parse(request, call)
|
|
65
|
+
return unauthenticated!(call) unless authorized?(call)
|
|
66
|
+
|
|
67
|
+
response_class = ::Revund::Worker::V1::ParseResponse rescue nil
|
|
68
|
+
return nil if response_class.nil?
|
|
69
|
+
|
|
70
|
+
repo_path = resolve_repo_path(request)
|
|
71
|
+
parsed = @parser.parse_files(repo_path, request.files.to_a)
|
|
72
|
+
response_class.new(files: parsed)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
private
|
|
76
|
+
|
|
77
|
+
# Validate the bearer header against REVUND_WORKER_SECRET.
|
|
78
|
+
# Empty / unset secret = no enforcement (CLI / local-dev
|
|
79
|
+
# default). Returns true on success or "no enforcement."
|
|
80
|
+
def authorized?(call)
|
|
81
|
+
expected = ENV[AUTH_SECRET_ENV].to_s
|
|
82
|
+
return true if expected.empty?
|
|
83
|
+
|
|
84
|
+
md = call.respond_to?(:metadata) ? (call.metadata || {}) : {}
|
|
85
|
+
Array(md[AUTH_HEADER]).first.to_s == expected
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def unauthenticated!(_call)
|
|
89
|
+
raise GRPC::Unauthenticated, 'missing or invalid x-revund-worker-token'
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Two dispatch modes:
|
|
93
|
+
# - shared-FS path mode: returns request.repo_path verbatim
|
|
94
|
+
# - self-fetch mode: hands the RepoSource to Fetcher, returns
|
|
95
|
+
# the local cached checkout path.
|
|
96
|
+
def resolve_repo_path(request)
|
|
97
|
+
src = request.respond_to?(:repo_source) ? request.repo_source : nil
|
|
98
|
+
if src && !src.url.to_s.empty?
|
|
99
|
+
return Fetcher.fetch_or_cache(
|
|
100
|
+
url: src.url,
|
|
101
|
+
ref: src.ref,
|
|
102
|
+
auth_token: src.auth_token,
|
|
103
|
+
auth_user: src.respond_to?(:auth_user) ? src.auth_user : '',
|
|
104
|
+
)
|
|
105
|
+
end
|
|
106
|
+
request.repo_path
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# ResolveSymbols and RunDiagnostics are intentionally not
|
|
110
|
+
# implemented. The bot's caller checks the `capabilities`
|
|
111
|
+
# list from Describe and skips RPCs the worker hasn't
|
|
112
|
+
# advertised — so unimplemented = silently skipped.
|
|
113
|
+
end
|
|
114
|
+
end
|
|
@@ -0,0 +1,480 @@
|
|
|
1
|
+
// worker.proto — the universal AST-worker contract.
|
|
2
|
+
//
|
|
3
|
+
// # Public API
|
|
4
|
+
//
|
|
5
|
+
// This file defines the wire contract every Revund AST
|
|
6
|
+
// sidecar speaks. It is designed for STABILITY since the
|
|
7
|
+
// contract may eventually be published as an open-source
|
|
8
|
+
// interface that third-party language sidecars target.
|
|
9
|
+
//
|
|
10
|
+
// Compatibility rules:
|
|
11
|
+
//
|
|
12
|
+
// - Adding fields to existing messages: ALLOWED (proto3
|
|
13
|
+
// ignores unknown fields gracefully).
|
|
14
|
+
// - Adding new RPCs: ALLOWED (clients negotiate capability
|
|
15
|
+
// via Describe).
|
|
16
|
+
// - Removing or renaming fields: BREAKING — bump the
|
|
17
|
+
// package version (v1 → v2). Old clients keep speaking v1.
|
|
18
|
+
// - Changing field types: BREAKING.
|
|
19
|
+
//
|
|
20
|
+
// # Sidecar contract
|
|
21
|
+
//
|
|
22
|
+
// A Revund worker is any process implementing this service.
|
|
23
|
+
// It can be:
|
|
24
|
+
//
|
|
25
|
+
// - A first-party reference implementation we maintain
|
|
26
|
+
// (ts-worker, php-worker, ruby-worker)
|
|
27
|
+
// - A community-built sidecar for any language
|
|
28
|
+
// - A proprietary worker a customer builds for their
|
|
29
|
+
// in-house DSL
|
|
30
|
+
//
|
|
31
|
+
// The bot dials the worker by host:port, calls `Describe`
|
|
32
|
+
// to learn which languages + capabilities it advertises,
|
|
33
|
+
// and routes Parse RPCs based on the response. The bot
|
|
34
|
+
// does NOT hardcode language-to-worker mappings — every
|
|
35
|
+
// worker self-identifies.
|
|
36
|
+
//
|
|
37
|
+
// # Minimum viable worker
|
|
38
|
+
//
|
|
39
|
+
// Implement only `Describe`, `Health`, and `Parse`. Set
|
|
40
|
+
// `capabilities = ["parse"]`. The bot uses that worker for
|
|
41
|
+
// its advertised languages and skips the rest gracefully.
|
|
42
|
+
//
|
|
43
|
+
// # Symbol resolution / diagnostics (optional)
|
|
44
|
+
//
|
|
45
|
+
// Capabilities are advisory. Workers that ALSO support
|
|
46
|
+
// symbol resolution (returning declarations of identifiers
|
|
47
|
+
// referenced in changed code) advertise
|
|
48
|
+
// `capabilities = ["parse", "resolve_symbols"]`. Workers
|
|
49
|
+
// that run language-specific type-checkers (TypeScript's
|
|
50
|
+
// `tsc --noEmit`, PHP's PHPStan, Ruby's Sorbet) advertise
|
|
51
|
+
// `["parse", "diagnostics"]`. The bot uses these features
|
|
52
|
+
// when present, falls back to "just parse" when not.
|
|
53
|
+
|
|
54
|
+
syntax = "proto3";
|
|
55
|
+
|
|
56
|
+
package revund.worker.v1;
|
|
57
|
+
|
|
58
|
+
option go_package = "github.com/revund-dev/revund/core/pkg/worker/proto/worker/v1;workerpb";
|
|
59
|
+
|
|
60
|
+
service Worker {
|
|
61
|
+
// Describe identifies the worker. The bot calls this
|
|
62
|
+
// first to learn what languages + capabilities it
|
|
63
|
+
// supports. Idempotent, side-effect-free.
|
|
64
|
+
rpc Describe(DescribeRequest) returns (DescribeResponse);
|
|
65
|
+
|
|
66
|
+
// Health is a standard k8s-style liveness probe.
|
|
67
|
+
rpc Health(HealthRequest) returns (HealthResponse);
|
|
68
|
+
|
|
69
|
+
// Parse returns a minimal AST view (imports + decls +
|
|
70
|
+
// functions + concerns) for the requested files. Per-
|
|
71
|
+
// file parse errors are returned INSIDE the response,
|
|
72
|
+
// never as RPC errors. REQUIRED capability — every
|
|
73
|
+
// healthy worker implements this.
|
|
74
|
+
rpc Parse(ParseRequest) returns (ParseResponse);
|
|
75
|
+
|
|
76
|
+
// ResolveSymbols returns the declarations of identifiers
|
|
77
|
+
// referenced in a diff but defined elsewhere in the
|
|
78
|
+
// repo. Used by the bot's ingest layer to enrich the
|
|
79
|
+
// LLM bundle with cross-file type information.
|
|
80
|
+
//
|
|
81
|
+
// OPTIONAL capability — workers advertise
|
|
82
|
+
// "resolve_symbols" in Describe.Capabilities when
|
|
83
|
+
// implemented. Workers that don't implement it return
|
|
84
|
+
// UNIMPLEMENTED; the bot's caller checks the capability
|
|
85
|
+
// list and skips the call when unsupported.
|
|
86
|
+
rpc ResolveSymbols(ResolveRequest) returns (ResolveResponse);
|
|
87
|
+
|
|
88
|
+
// RunDiagnostics runs the language's native type-checker
|
|
89
|
+
// (tsc --noEmit for TypeScript, PHPStan for PHP, Sorbet
|
|
90
|
+
// for Ruby, etc.) and returns errors touching the
|
|
91
|
+
// changed files. Empty when the project has no errors
|
|
92
|
+
// or the worker doesn't run a type-checker — never an
|
|
93
|
+
// RPC error.
|
|
94
|
+
//
|
|
95
|
+
// OPTIONAL capability — workers advertise "diagnostics"
|
|
96
|
+
// in Describe.Capabilities when implemented.
|
|
97
|
+
rpc RunDiagnostics(DiagnosticsRequest) returns (DiagnosticsResponse);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// ─────────────────────────────────────────────────────────
|
|
101
|
+
// Describe — self-identification
|
|
102
|
+
// ─────────────────────────────────────────────────────────
|
|
103
|
+
|
|
104
|
+
message DescribeRequest {}
|
|
105
|
+
|
|
106
|
+
message DescribeResponse {
|
|
107
|
+
// Name is a human-readable identifier for this worker,
|
|
108
|
+
// e.g. "ts-worker", "php-worker", "ruby-worker", or a
|
|
109
|
+
// community-chosen name like "my-fancy-go-worker".
|
|
110
|
+
// Used for logging and observability; NOT used for
|
|
111
|
+
// routing (languages field does that).
|
|
112
|
+
string name = 1;
|
|
113
|
+
|
|
114
|
+
// Version is the worker's semantic version. Logged at
|
|
115
|
+
// startup and surfaced in debug bundles so we can tie
|
|
116
|
+
// findings to a specific worker build.
|
|
117
|
+
string version = 2;
|
|
118
|
+
|
|
119
|
+
// Languages the worker can parse. Lowercase canonical
|
|
120
|
+
// names matching the lang.Language enum on the Go side:
|
|
121
|
+
// "typescript", "javascript", "go", "php", "ruby",
|
|
122
|
+
// "python", "rust", "java", "kotlin", "swift", "csharp".
|
|
123
|
+
//
|
|
124
|
+
// A worker MAY advertise multiple languages (a tree-
|
|
125
|
+
// sitter-based worker might handle ten). The bot routes
|
|
126
|
+
// each language's files to the worker that advertises
|
|
127
|
+
// it; on conflict (multiple workers advertise the same
|
|
128
|
+
// language), the bot picks the first one registered.
|
|
129
|
+
repeated string languages = 3;
|
|
130
|
+
|
|
131
|
+
// Capabilities is the set of optional features this
|
|
132
|
+
// worker implements beyond the required Parse RPC.
|
|
133
|
+
// Known values:
|
|
134
|
+
//
|
|
135
|
+
// "parse" — Parse RPC (REQUIRED; always
|
|
136
|
+
// present in a healthy worker)
|
|
137
|
+
// "resolve_symbols" — symbol-decl resolution; the
|
|
138
|
+
// worker can return declarations
|
|
139
|
+
// of identifiers referenced in
|
|
140
|
+
// changed code.
|
|
141
|
+
// "diagnostics" — language-specific type-check
|
|
142
|
+
// diagnostics (tsc, PHPStan,
|
|
143
|
+
// Sorbet, etc.).
|
|
144
|
+
// "self_fetch" — worker can clone the repo
|
|
145
|
+
// itself given a RepoSource
|
|
146
|
+
// (url + ref + auth_token) on
|
|
147
|
+
// the request. Required for
|
|
148
|
+
// cross-host deployments where
|
|
149
|
+
// the bot and worker do NOT
|
|
150
|
+
// share a filesystem. When this
|
|
151
|
+
// capability is advertised, the
|
|
152
|
+
// bot sends `repo_source`
|
|
153
|
+
// instead of `repo_path` on
|
|
154
|
+
// Parse / ResolveSymbols /
|
|
155
|
+
// RunDiagnostics RPCs.
|
|
156
|
+
//
|
|
157
|
+
// New capabilities can be added without breaking older
|
|
158
|
+
// bots — they advertise but the bot ignores capabilities
|
|
159
|
+
// it doesn't recognize.
|
|
160
|
+
repeated string capabilities = 4;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// RepoSource tells a self-fetching worker how to obtain
|
|
164
|
+
// the repo without depending on a shared filesystem with
|
|
165
|
+
// the bot. The worker shallow-clones via git using these
|
|
166
|
+
// values, caches the clone for the review's other RPCs,
|
|
167
|
+
// and evicts on idle.
|
|
168
|
+
//
|
|
169
|
+
// Capability gate: workers MUST advertise "self_fetch"
|
|
170
|
+
// in Describe.Capabilities before the bot will send
|
|
171
|
+
// `repo_source`. Workers without that capability ignore
|
|
172
|
+
// the field entirely and use `repo_path`.
|
|
173
|
+
//
|
|
174
|
+
// # Security expectations on workers
|
|
175
|
+
//
|
|
176
|
+
// - Use auth_token ONLY at clone time (as the password
|
|
177
|
+
// in https://x-access-token:<TOKEN>@host/path.git).
|
|
178
|
+
// - Immediately after a successful clone, strip the
|
|
179
|
+
// token from the remote URL stored in .git/config.
|
|
180
|
+
// - Never log the token. Sanitize error messages that
|
|
181
|
+
// might include the URL.
|
|
182
|
+
// - Never persist the token to disk in any form.
|
|
183
|
+
//
|
|
184
|
+
// Tokens are short-lived (typically 1 hour for GitHub
|
|
185
|
+
// installation tokens) and scoped to the source repo's
|
|
186
|
+
// permissions, so blast radius is bounded — but the
|
|
187
|
+
// hygiene rules above still apply.
|
|
188
|
+
message RepoSource {
|
|
189
|
+
// url is the https clone URL, e.g.
|
|
190
|
+
// "https://github.com/owner/repo.git" — no userinfo.
|
|
191
|
+
// SSH URLs are not supported here — the bot's auth
|
|
192
|
+
// model is bearer-token-over-https.
|
|
193
|
+
string url = 1;
|
|
194
|
+
|
|
195
|
+
// ref is the commit SHA (preferred) or branch / tag
|
|
196
|
+
// name to check out. Commit SHA is deterministic and
|
|
197
|
+
// matches what GitHub / GitLab webhooks already give
|
|
198
|
+
// us; branch/tag names are accepted for the local-dev
|
|
199
|
+
// case but workers should warn when they receive one
|
|
200
|
+
// (drift risk).
|
|
201
|
+
string ref = 2;
|
|
202
|
+
|
|
203
|
+
// auth_token is the bearer credential the worker uses
|
|
204
|
+
// as the password in the clone URL. Typically a
|
|
205
|
+
// platform installation access token (1h TTL).
|
|
206
|
+
string auth_token = 3;
|
|
207
|
+
|
|
208
|
+
// auth_user is the basic-auth username the worker
|
|
209
|
+
// pairs with auth_token to compose the clone URL:
|
|
210
|
+
//
|
|
211
|
+
// https://<auth_user>:<auth_token>@<host>/<path>
|
|
212
|
+
//
|
|
213
|
+
// Defaults to "x-access-token" when empty — that's
|
|
214
|
+
// GitHub's convention and works for the majority of
|
|
215
|
+
// first-party deployments. Other platforms set this
|
|
216
|
+
// explicitly:
|
|
217
|
+
//
|
|
218
|
+
// GitHub → "x-access-token" (or empty)
|
|
219
|
+
// GitLab → "oauth2"
|
|
220
|
+
// Bitbucket → "x-token-auth"
|
|
221
|
+
//
|
|
222
|
+
// Adding a new platform = the bot sets a new
|
|
223
|
+
// auth_user string; workers don't need to know which
|
|
224
|
+
// platform they're talking to.
|
|
225
|
+
string auth_user = 4;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
message HealthRequest {}
|
|
229
|
+
message HealthResponse {
|
|
230
|
+
string version = 1;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// ─────────────────────────────────────────────────────────
|
|
234
|
+
// Parse — the core AST RPC
|
|
235
|
+
// ─────────────────────────────────────────────────────────
|
|
236
|
+
|
|
237
|
+
message ParseRequest {
|
|
238
|
+
// Absolute path to the repo root. The worker resolves
|
|
239
|
+
// file paths relative to this root.
|
|
240
|
+
//
|
|
241
|
+
// Exactly one of repo_path / repo_source is populated
|
|
242
|
+
// per request. repo_path is set when the bot and
|
|
243
|
+
// worker share a filesystem (CLI / local sidecar /
|
|
244
|
+
// pod-with-shared-volume). repo_source is set when
|
|
245
|
+
// the worker advertises "self_fetch" and the bot is
|
|
246
|
+
// dialing across the network.
|
|
247
|
+
string repo_path = 1;
|
|
248
|
+
|
|
249
|
+
// Repo-relative paths to parse. Slash-normalized,
|
|
250
|
+
// forward-slash separator regardless of platform.
|
|
251
|
+
repeated string files = 2;
|
|
252
|
+
|
|
253
|
+
// Self-fetch source — populated when the bot wants
|
|
254
|
+
// the worker to clone the repo itself. See RepoSource
|
|
255
|
+
// for the capability gate and security expectations.
|
|
256
|
+
RepoSource repo_source = 3;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
message ParseResponse {
|
|
260
|
+
repeated ParsedFile files = 1;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// ParsedFile carries the minimal AST surface every
|
|
264
|
+
// structural detector consumes. Shape is universal across
|
|
265
|
+
// languages — implementations populate the fields their
|
|
266
|
+
// language meaningfully exposes and leave the rest empty.
|
|
267
|
+
message ParsedFile {
|
|
268
|
+
// Repo-relative path, slash-normalized. Echoed back from
|
|
269
|
+
// the request so the bot can correlate response files
|
|
270
|
+
// with request files without ordering assumptions.
|
|
271
|
+
string path = 1;
|
|
272
|
+
|
|
273
|
+
// The language tag the worker assigned to this file.
|
|
274
|
+
// Usually matches one of the languages from the worker's
|
|
275
|
+
// Describe response. Set even on parse errors.
|
|
276
|
+
string language = 2;
|
|
277
|
+
|
|
278
|
+
repeated ImportRef imports = 3;
|
|
279
|
+
repeated DeclRef decls = 4;
|
|
280
|
+
repeated FunctionRef functions = 5;
|
|
281
|
+
repeated ConcernEvidenceRef concerns = 7;
|
|
282
|
+
|
|
283
|
+
// Non-empty when the worker failed to parse this file
|
|
284
|
+
// cleanly. The other fields may still carry partial
|
|
285
|
+
// results — workers should return whatever the parser
|
|
286
|
+
// salvaged so detectors get USEFUL signal even on
|
|
287
|
+
// syntactically-broken files.
|
|
288
|
+
string parse_error = 6;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
// ImportRef is one import / require / use declaration.
|
|
292
|
+
message ImportRef {
|
|
293
|
+
// Raw module identifier as written in source. The bot
|
|
294
|
+
// does NOT resolve this against the language's module
|
|
295
|
+
// system; that's the worker's job if it advertises
|
|
296
|
+
// resolve_symbols.
|
|
297
|
+
string path = 1;
|
|
298
|
+
|
|
299
|
+
// Local binding when the language allows renaming on
|
|
300
|
+
// import (Go: `import f "fmt"`, JS: `import { x as y }`,
|
|
301
|
+
// PHP: `use Foo\Bar as B`, Ruby: autoload). Empty when
|
|
302
|
+
// not applicable or not aliased.
|
|
303
|
+
string alias = 2;
|
|
304
|
+
|
|
305
|
+
// 1-based line number in the source file.
|
|
306
|
+
int32 line = 3;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
// DeclRef is one top-level declaration. Kind is a lowercase
|
|
310
|
+
// string from the canonical set: function | method | type |
|
|
311
|
+
// interface | const | var | class | trait | enum | module |
|
|
312
|
+
// constant | component | hook. Workers MAY advertise
|
|
313
|
+
// kinds not in this list; the bot's structural detectors
|
|
314
|
+
// switch on the canonical set and silently ignore unknowns
|
|
315
|
+
// (forward-compat).
|
|
316
|
+
message DeclRef {
|
|
317
|
+
string name = 1;
|
|
318
|
+
string kind = 2;
|
|
319
|
+
int32 line = 3;
|
|
320
|
+
int32 end_line = 4;
|
|
321
|
+
|
|
322
|
+
// Exported is whether this declaration is visible
|
|
323
|
+
// outside its compilation unit / module / namespace.
|
|
324
|
+
// Languages without formal export concepts (PHP, Ruby)
|
|
325
|
+
// set this to true for top-level decls.
|
|
326
|
+
bool exported = 5;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// FunctionRef describes one function / method definition.
|
|
330
|
+
// Carries the data the structural detectors need for
|
|
331
|
+
// god-function, complexity, and the three DRY variants.
|
|
332
|
+
message FunctionRef {
|
|
333
|
+
string name = 1;
|
|
334
|
+
int32 start_line = 2;
|
|
335
|
+
int32 end_line = 3;
|
|
336
|
+
|
|
337
|
+
// Cyclomatic complexity estimate (McCabe — count of
|
|
338
|
+
// decision points + 1). Workers may compute it however
|
|
339
|
+
// they prefer; the bot uses the value as a coarse
|
|
340
|
+
// signal, not a precise measurement.
|
|
341
|
+
int32 complexity = 4;
|
|
342
|
+
|
|
343
|
+
bool is_method = 5;
|
|
344
|
+
bool is_exported = 6;
|
|
345
|
+
|
|
346
|
+
// Hash is the LANGUAGE-SPECIFIC fingerprint of the
|
|
347
|
+
// function body's AST shape. Two functions in the same
|
|
348
|
+
// language with the same hash share structure modulo
|
|
349
|
+
// identifier names + literal values. Empty when the
|
|
350
|
+
// body was too small to be a meaningful DRY signal.
|
|
351
|
+
//
|
|
352
|
+
// The hashing scheme is per-worker. Recommended: SHA-1
|
|
353
|
+
// of a token stream, truncated to 16 hex chars. The
|
|
354
|
+
// ts-worker reference implementation in
|
|
355
|
+
// workers/ts/src/parser.ts is the template for new
|
|
356
|
+
// workers.
|
|
357
|
+
string hash = 7;
|
|
358
|
+
|
|
359
|
+
// CanonicalHash is the LANGUAGE-NEUTRAL fingerprint
|
|
360
|
+
// produced by mapping the function body's AST onto a
|
|
361
|
+
// shared canonical token vocabulary. Two functions in
|
|
362
|
+
// DIFFERENT languages with the same canonical hash share
|
|
363
|
+
// a structural shape — the basis for cross-language
|
|
364
|
+
// duplicate detection.
|
|
365
|
+
//
|
|
366
|
+
// The canonical vocabulary is defined in
|
|
367
|
+
// core/pkg/structural/lang/canonical.go. New workers
|
|
368
|
+
// mirror that vocabulary; PR-able if a new construct
|
|
369
|
+
// needs adding.
|
|
370
|
+
string canonical_hash = 8;
|
|
371
|
+
|
|
372
|
+
repeated BlockRef blocks = 9;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
// BlockRef is one nested statement block (if-body, else-
|
|
376
|
+
// body, for-body, switch-case body, try / catch / finally
|
|
377
|
+
// body). Each block carries both hash variants so within-
|
|
378
|
+
// language and cross-language block-duplicate detection
|
|
379
|
+
// consume the same data.
|
|
380
|
+
message BlockRef {
|
|
381
|
+
// Kind of the construct that owns this block:
|
|
382
|
+
// "if" | "else" | "elseif" | "for" | "case" |
|
|
383
|
+
// "try" | "catch" | "finally" | "rescue" | "block".
|
|
384
|
+
string kind = 1;
|
|
385
|
+
|
|
386
|
+
int32 start_line = 2;
|
|
387
|
+
int32 end_line = 3;
|
|
388
|
+
string hash = 4; // language-specific
|
|
389
|
+
string canonical_hash = 5; // language-neutral
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// ConcernEvidenceRef is one categorized signal the worker
|
|
393
|
+
// extracted. The bot demuxes by category into the typed
|
|
394
|
+
// ConcernSet on lang.FileView.
|
|
395
|
+
//
|
|
396
|
+
// Canonical category values:
|
|
397
|
+
// "presentation" — UI rendering / templates / JSX
|
|
398
|
+
// "state" — in-memory state, hooks, signals
|
|
399
|
+
// "transport" — server-side request handlers
|
|
400
|
+
// "network" — outbound HTTP / RPC / message-broker
|
|
401
|
+
// "dataaccess" — persistent storage operations
|
|
402
|
+
// "io" — filesystem, OS-level IO
|
|
403
|
+
// "config" — env reads, flags, dotenv
|
|
404
|
+
// "business" — high-complexity decision logic
|
|
405
|
+
//
|
|
406
|
+
// New categories can be added by workers; the bot
|
|
407
|
+
// silently drops unknown values so older bots keep working.
|
|
408
|
+
message ConcernEvidenceRef {
|
|
409
|
+
string category = 1;
|
|
410
|
+
int32 line = 2;
|
|
411
|
+
string symbol = 3;
|
|
412
|
+
string note = 4;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
// ─────────────────────────────────────────────────────────
|
|
416
|
+
// ResolveSymbols — optional capability
|
|
417
|
+
// ─────────────────────────────────────────────────────────
|
|
418
|
+
|
|
419
|
+
message ResolveRequest {
|
|
420
|
+
// Absolute repo root path. See ParseRequest.repo_path
|
|
421
|
+
// for the repo_path / repo_source contract.
|
|
422
|
+
string repo_path = 1;
|
|
423
|
+
|
|
424
|
+
// The unified diff being reviewed. The worker extracts
|
|
425
|
+
// referenced-but-undeclared identifiers from the diff
|
|
426
|
+
// and looks up their declarations.
|
|
427
|
+
string diff = 2;
|
|
428
|
+
|
|
429
|
+
// Repo-relative paths of changed files. Authoritative
|
|
430
|
+
// when present; the worker may also derive its own list
|
|
431
|
+
// from the diff.
|
|
432
|
+
repeated string changed_files = 3;
|
|
433
|
+
|
|
434
|
+
// Self-fetch source — set when the worker should clone
|
|
435
|
+
// the repo itself instead of reading from repo_path.
|
|
436
|
+
RepoSource repo_source = 4;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
message ResolveResponse {
|
|
440
|
+
repeated SymbolDecl symbols = 1;
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
message SymbolDecl {
|
|
444
|
+
string name = 1;
|
|
445
|
+
string file_path = 2; // repo-relative
|
|
446
|
+
int32 start_line = 3;
|
|
447
|
+
int32 end_line = 4;
|
|
448
|
+
string text = 5; // full declaration source
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
// ─────────────────────────────────────────────────────────
|
|
452
|
+
// RunDiagnostics — optional capability
|
|
453
|
+
// ─────────────────────────────────────────────────────────
|
|
454
|
+
|
|
455
|
+
message DiagnosticsRequest {
|
|
456
|
+
// See ParseRequest.repo_path for the repo_path /
|
|
457
|
+
// repo_source contract.
|
|
458
|
+
string repo_path = 1;
|
|
459
|
+
|
|
460
|
+
// Only return diagnostics whose file is in this set.
|
|
461
|
+
// Empty = no filter (return everything tsc / PHPStan /
|
|
462
|
+
// Sorbet found).
|
|
463
|
+
repeated string filter_files = 2;
|
|
464
|
+
|
|
465
|
+
// Self-fetch source — set when the worker should clone
|
|
466
|
+
// the repo itself instead of reading from repo_path.
|
|
467
|
+
RepoSource repo_source = 3;
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
message DiagnosticsResponse {
|
|
471
|
+
repeated Diagnostic diagnostics = 1;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
message Diagnostic {
|
|
475
|
+
string file = 1; // repo-relative
|
|
476
|
+
int32 line = 2;
|
|
477
|
+
int32 col = 3;
|
|
478
|
+
string code = 4; // e.g. "TS2345", "PHPStan.Error", "Sorbet:7008"
|
|
479
|
+
string message = 5;
|
|
480
|
+
}
|