revund-ruby-worker 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +190 -0
- data/README.md +60 -0
- data/bin/revund-ruby-worker +17 -0
- data/lib/ruby_worker/fetcher.rb +147 -0
- data/lib/ruby_worker/parser.rb +663 -0
- data/lib/ruby_worker/server.rb +36 -0
- data/lib/ruby_worker/service.rb +114 -0
- data/lib/ruby_worker/version.rb +5 -0
- data/proto/worker/v1/worker.proto +480 -0
- metadata +117 -0
|
@@ -0,0 +1,663 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'digest'
|
|
4
|
+
require 'parser/current'
|
|
5
|
+
|
|
6
|
+
# The generated protobuf classes — these come from
|
|
7
|
+
# scripts/gen-proto.sh (a follow-up). We reference them as
|
|
8
|
+
# constants lazily so the file loads cleanly even before
|
|
9
|
+
# the stubs exist.
|
|
10
|
+
|
|
11
|
+
module RubyWorker
|
|
12
|
+
# Parser walks each requested Ruby file via the
|
|
13
|
+
# whitequark/parser gem and produces the ParsedFile
|
|
14
|
+
# messages the Go-side structural detectors consume.
|
|
15
|
+
#
|
|
16
|
+
# Mirrors `workers/ts/src/parser.ts` and
|
|
17
|
+
# `workers/php/src/Parser.php` structurally — same six
|
|
18
|
+
# collectors, same hashing scheme, same block-extraction
|
|
19
|
+
# shape. Cross-language symmetry isn't accidental; it's how
|
|
20
|
+
# the canonical-hash detector clusters Ruby / TS / Go /
|
|
21
|
+
# PHP functions that share a structural shape.
|
|
22
|
+
#
|
|
23
|
+
# ## Hashing scheme
|
|
24
|
+
#
|
|
25
|
+
# Two hashes per function:
|
|
26
|
+
#
|
|
27
|
+
# * `hash` (language-specific): captures Ruby-flavored AST
|
|
28
|
+
# node types including operator method names on `:send`
|
|
29
|
+
# nodes. Two Ruby methods with the same hash share AST
|
|
30
|
+
# shape modulo identifier names and literal values.
|
|
31
|
+
# * `canonical_hash` (cross-language): same scheme using
|
|
32
|
+
# the universal token vocabulary defined in
|
|
33
|
+
# core/pkg/structural/lang/canonical.go.
|
|
34
|
+
#
|
|
35
|
+
# Both hashes use SHA-1 truncated to 16 hex chars. Trivial
|
|
36
|
+
# bodies (≤2 nodes) short-circuit to "".
|
|
37
|
+
#
|
|
38
|
+
# ## Concerns
|
|
39
|
+
#
|
|
40
|
+
# Per-file concerns are categorized into ConcernEvidenceRef
|
|
41
|
+
# entries tagged with one of the eight canonical
|
|
42
|
+
# categories. The classifier looks at:
|
|
43
|
+
#
|
|
44
|
+
# * `:send` nodes whose method is a known state / network
|
|
45
|
+
# / io / config / dataaccess identifier
|
|
46
|
+
# * `[]` accesses on session / cookies / ENV
|
|
47
|
+
# * Rails.cache, Rails.application.config
|
|
48
|
+
# * High-complexity methods → business
|
|
49
|
+
#
|
|
50
|
+
# The taxonomy aligns with the Rails framework profile in
|
|
51
|
+
# `core/pkg/structural/framework/rails.go`.
|
|
52
|
+
#
|
|
53
|
+
# ## Error tolerance
|
|
54
|
+
#
|
|
55
|
+
# Syntactically broken Ruby still yields a partial
|
|
56
|
+
# ParsedFile with whatever the parser salvaged. The
|
|
57
|
+
# `parse_error` field carries the SyntaxError's message
|
|
58
|
+
# verbatim. The parser gem itself has best-in-class error
|
|
59
|
+
# recovery (it's what RuboCop relies on for partial parses).
|
|
60
|
+
class Parser
|
|
61
|
+
BUSINESS_COMPLEXITY = 8
|
|
62
|
+
MIN_BLOCK_STMTS = 3
|
|
63
|
+
HASH_HEX_LEN = 16
|
|
64
|
+
|
|
65
|
+
def initialize
|
|
66
|
+
@ruby_parser = ::Parser::CurrentRuby.new
|
|
67
|
+
# Silence parser-gem warnings on stderr — they pollute
|
|
68
|
+
# the gRPC server logs.
|
|
69
|
+
@ruby_parser.diagnostics.all_errors_are_fatal = false
|
|
70
|
+
@ruby_parser.diagnostics.ignore_warnings = true
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# @param repo_path [String] absolute repo root path
|
|
74
|
+
# @param rel_paths [Array<String>] repo-relative file paths
|
|
75
|
+
# @return [Array<Object>] ParsedFile proto messages
|
|
76
|
+
def parse_files(repo_path, rel_paths)
|
|
77
|
+
rel_paths.map { |rel| parse_one(repo_path, rel) }
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
private
|
|
81
|
+
|
|
82
|
+
def parsed_file_class
|
|
83
|
+
@parsed_file_class ||= ::Revund::Worker::V1::ParsedFile
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def parse_one(repo_path, rel)
|
|
87
|
+
abs = File.join(repo_path, rel)
|
|
88
|
+
pf = parsed_file_class.new(path: rel, language: 'ruby')
|
|
89
|
+
|
|
90
|
+
unless File.readable?(abs)
|
|
91
|
+
pf.parse_error = "file not readable: #{abs}"
|
|
92
|
+
return pf
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
source = File.read(abs)
|
|
96
|
+
buffer = ::Parser::Source::Buffer.new(abs)
|
|
97
|
+
buffer.source = source
|
|
98
|
+
|
|
99
|
+
@ruby_parser.reset
|
|
100
|
+
ast = nil
|
|
101
|
+
begin
|
|
102
|
+
ast = @ruby_parser.parse(buffer)
|
|
103
|
+
rescue ::Parser::SyntaxError => e
|
|
104
|
+
pf.parse_error = e.message
|
|
105
|
+
return pf
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# whitequark/parser returns nil for empty / whitespace-
|
|
109
|
+
# only files. Not an error — return the empty ParsedFile.
|
|
110
|
+
return pf if ast.nil?
|
|
111
|
+
|
|
112
|
+
pf.imports = collect_imports(ast)
|
|
113
|
+
pf.decls = collect_decls(ast)
|
|
114
|
+
pf.functions = collect_functions(ast)
|
|
115
|
+
pf.concerns = collect_concerns(ast)
|
|
116
|
+
pf
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# ────────────────────────────────────────────────
|
|
120
|
+
# Imports: require / require_relative / autoload
|
|
121
|
+
# ────────────────────────────────────────────────
|
|
122
|
+
|
|
123
|
+
def import_ref_class
|
|
124
|
+
@import_ref_class ||= ::Revund::Worker::V1::ImportRef
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def collect_imports(ast)
|
|
128
|
+
out = []
|
|
129
|
+
walk(ast) do |node|
|
|
130
|
+
next unless node.is_a?(::Parser::AST::Node) && node.type == :send
|
|
131
|
+
receiver, method, *args = node.children
|
|
132
|
+
next unless receiver.nil? # bare top-level calls only
|
|
133
|
+
|
|
134
|
+
case method
|
|
135
|
+
when :require, :require_relative
|
|
136
|
+
path_node = args.first
|
|
137
|
+
next unless path_node && path_node.type == :str
|
|
138
|
+
out << import_ref_class.new(
|
|
139
|
+
path: path_node.children.first.to_s,
|
|
140
|
+
alias: '',
|
|
141
|
+
line: node.loc.line,
|
|
142
|
+
)
|
|
143
|
+
when :autoload
|
|
144
|
+
# autoload(:Const, "path/to/file")
|
|
145
|
+
const_node, path_node = args
|
|
146
|
+
next unless const_node && path_node && path_node.type == :str
|
|
147
|
+
alias_name = const_node.type == :sym ? const_node.children.first.to_s : ''
|
|
148
|
+
out << import_ref_class.new(
|
|
149
|
+
path: path_node.children.first.to_s,
|
|
150
|
+
alias: alias_name,
|
|
151
|
+
line: node.loc.line,
|
|
152
|
+
)
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
out
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# ────────────────────────────────────────────────
|
|
159
|
+
# Decls: class / module / def / casgn (constants)
|
|
160
|
+
# ────────────────────────────────────────────────
|
|
161
|
+
|
|
162
|
+
def decl_ref_class
|
|
163
|
+
@decl_ref_class ||= ::Revund::Worker::V1::DeclRef
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def collect_decls(ast)
|
|
167
|
+
out = []
|
|
168
|
+
# Top-level only. A top-level :begin wraps a list of
|
|
169
|
+
# statements; unwrap it. Nested class methods produce
|
|
170
|
+
# FunctionRef entries via collect_functions, not decls.
|
|
171
|
+
top_nodes = ast.type == :begin ? ast.children : [ast]
|
|
172
|
+
top_nodes.each do |node|
|
|
173
|
+
next unless node.is_a?(::Parser::AST::Node)
|
|
174
|
+
decl = decl_from_node(node)
|
|
175
|
+
out << decl if decl
|
|
176
|
+
end
|
|
177
|
+
out
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def decl_from_node(node)
|
|
181
|
+
case node.type
|
|
182
|
+
when :class
|
|
183
|
+
name = const_name(node.children[0])
|
|
184
|
+
return nil if name.empty?
|
|
185
|
+
decl_ref_class.new(
|
|
186
|
+
name: name,
|
|
187
|
+
kind: 'class',
|
|
188
|
+
line: node.loc.line,
|
|
189
|
+
end_line: end_line_of(node),
|
|
190
|
+
exported: true,
|
|
191
|
+
)
|
|
192
|
+
when :module
|
|
193
|
+
name = const_name(node.children[0])
|
|
194
|
+
return nil if name.empty?
|
|
195
|
+
decl_ref_class.new(
|
|
196
|
+
name: name,
|
|
197
|
+
kind: 'module',
|
|
198
|
+
line: node.loc.line,
|
|
199
|
+
end_line: end_line_of(node),
|
|
200
|
+
exported: true,
|
|
201
|
+
)
|
|
202
|
+
when :def
|
|
203
|
+
decl_ref_class.new(
|
|
204
|
+
name: node.children[0].to_s,
|
|
205
|
+
kind: 'method',
|
|
206
|
+
line: node.loc.line,
|
|
207
|
+
end_line: end_line_of(node),
|
|
208
|
+
exported: true,
|
|
209
|
+
)
|
|
210
|
+
when :casgn
|
|
211
|
+
name_node = node.children[1]
|
|
212
|
+
decl_ref_class.new(
|
|
213
|
+
name: name_node.to_s,
|
|
214
|
+
kind: 'constant',
|
|
215
|
+
line: node.loc.line,
|
|
216
|
+
end_line: end_line_of(node),
|
|
217
|
+
exported: true,
|
|
218
|
+
)
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def const_name(node)
|
|
223
|
+
return '' unless node.is_a?(::Parser::AST::Node)
|
|
224
|
+
return node.children[1].to_s if node.type == :const
|
|
225
|
+
''
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def end_line_of(node)
|
|
229
|
+
loc = node.loc
|
|
230
|
+
return loc.line unless loc.respond_to?(:expression) && loc.expression
|
|
231
|
+
loc.expression.last_line
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
# ────────────────────────────────────────────────
|
|
235
|
+
# Functions: every :def / :defs anywhere in the file
|
|
236
|
+
# ────────────────────────────────────────────────
|
|
237
|
+
|
|
238
|
+
def function_ref_class
|
|
239
|
+
@function_ref_class ||= ::Revund::Worker::V1::FunctionRef
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
def collect_functions(ast)
|
|
243
|
+
out = []
|
|
244
|
+
walk(ast) do |node|
|
|
245
|
+
next unless node.is_a?(::Parser::AST::Node)
|
|
246
|
+
case node.type
|
|
247
|
+
when :def
|
|
248
|
+
out << build_function_ref(node.children[0].to_s, node, is_method: true, is_exported: true)
|
|
249
|
+
when :defs
|
|
250
|
+
# `def self.foo` — singleton method.
|
|
251
|
+
out << build_function_ref(node.children[1].to_s, node, is_method: true, is_exported: true)
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
out
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
def build_function_ref(name, node, is_method:, is_exported:)
|
|
258
|
+
function_ref_class.new(
|
|
259
|
+
name: name,
|
|
260
|
+
start_line: node.loc.line,
|
|
261
|
+
end_line: end_line_of(node),
|
|
262
|
+
complexity: cyclomatic_complexity(node),
|
|
263
|
+
is_method: is_method,
|
|
264
|
+
is_exported: is_exported,
|
|
265
|
+
hash: hash_function_body(node),
|
|
266
|
+
canonical_hash: canonical_hash_body(node),
|
|
267
|
+
blocks: extract_blocks(node),
|
|
268
|
+
)
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
# Cyclomatic complexity (McCabe): start at 1, add 1 per
|
|
272
|
+
# decision point.
|
|
273
|
+
#
|
|
274
|
+
# Decision points in Ruby:
|
|
275
|
+
# - :if (the else arm doesn't add — it's the negation)
|
|
276
|
+
# - :while, :until, :for
|
|
277
|
+
# - :when (each case arm)
|
|
278
|
+
# - :resbody (each rescue clause)
|
|
279
|
+
#
|
|
280
|
+
# We don't count short-circuit && / || or the ternary
|
|
281
|
+
# to match the Go + TS + PHP counterparts.
|
|
282
|
+
def cyclomatic_complexity(node)
|
|
283
|
+
score = 1
|
|
284
|
+
walk(node) do |n|
|
|
285
|
+
next unless n.is_a?(::Parser::AST::Node)
|
|
286
|
+
case n.type
|
|
287
|
+
when :if, :while, :until, :for
|
|
288
|
+
score += 1
|
|
289
|
+
when :when, :resbody
|
|
290
|
+
score += 1
|
|
291
|
+
end
|
|
292
|
+
end
|
|
293
|
+
score
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
# ────────────────────────────────────────────────
|
|
297
|
+
# Hashing
|
|
298
|
+
# ────────────────────────────────────────────────
|
|
299
|
+
|
|
300
|
+
def hash_function_body(node)
|
|
301
|
+
body = function_body(node)
|
|
302
|
+
tokens = []
|
|
303
|
+
nodes = walk_for_hash(body, tokens, canonical: false)
|
|
304
|
+
return '' if nodes <= 2
|
|
305
|
+
Digest::SHA1.hexdigest(tokens.join(';'))[0, HASH_HEX_LEN]
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
def canonical_hash_body(node)
|
|
309
|
+
body = function_body(node)
|
|
310
|
+
tokens = []
|
|
311
|
+
nodes = walk_for_hash(body, tokens, canonical: true)
|
|
312
|
+
return '' if nodes <= 2
|
|
313
|
+
Digest::SHA1.hexdigest(tokens.join(';'))[0, HASH_HEX_LEN]
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
# Returns the function body node.
|
|
317
|
+
# :def → (name, args, body) — body is children[2]
|
|
318
|
+
# :defs → (receiver, name, args, body) — body is children[3]
|
|
319
|
+
def function_body(node)
|
|
320
|
+
case node.type
|
|
321
|
+
when :def then node.children[2]
|
|
322
|
+
when :defs then node.children[3]
|
|
323
|
+
else node
|
|
324
|
+
end
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
# Walks a node (or array of nodes) producing the hash
|
|
328
|
+
# token stream. Returns the total node count so the
|
|
329
|
+
# caller can short-circuit trivial bodies.
|
|
330
|
+
def walk_for_hash(node, tokens, canonical:)
|
|
331
|
+
count = 0
|
|
332
|
+
stack = node.is_a?(Array) ? node.dup : [node]
|
|
333
|
+
until stack.empty?
|
|
334
|
+
n = stack.shift
|
|
335
|
+
next if n.nil?
|
|
336
|
+
next unless n.is_a?(::Parser::AST::Node)
|
|
337
|
+
|
|
338
|
+
count += 1
|
|
339
|
+
tokens << (canonical ? canonical_token(n) : ruby_token(n))
|
|
340
|
+
|
|
341
|
+
# Identifiers / literals carry no children worth
|
|
342
|
+
# descending into for hashing.
|
|
343
|
+
case n.type
|
|
344
|
+
when :lvar, :ivar, :cvar, :gvar, :arg, :const, :sym, :str, :int, :float, :true, :false, :nil
|
|
345
|
+
next
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
n.children.each do |child|
|
|
349
|
+
stack.push(child) if child.is_a?(::Parser::AST::Node)
|
|
350
|
+
end
|
|
351
|
+
end
|
|
352
|
+
count
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
def ruby_token(node)
|
|
356
|
+
case node.type
|
|
357
|
+
when :lvar, :ivar, :cvar, :gvar, :arg, :const
|
|
358
|
+
'I'
|
|
359
|
+
when :str
|
|
360
|
+
'L:STR'
|
|
361
|
+
when :int, :float
|
|
362
|
+
'L:NUM'
|
|
363
|
+
when :true, :false
|
|
364
|
+
'L:BOOL'
|
|
365
|
+
when :nil
|
|
366
|
+
'L:NIL'
|
|
367
|
+
when :sym
|
|
368
|
+
'L:SYM'
|
|
369
|
+
when :send
|
|
370
|
+
method = node.children[1]
|
|
371
|
+
method ? "SEND:#{method}" : node.type.to_s
|
|
372
|
+
else
|
|
373
|
+
node.type.to_s
|
|
374
|
+
end
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
# Canonical token vocabulary — mirrors
|
|
378
|
+
# core/pkg/structural/lang/canonical.go.
|
|
379
|
+
def canonical_token(node)
|
|
380
|
+
case node.type
|
|
381
|
+
when :if
|
|
382
|
+
'IF'
|
|
383
|
+
when :while, :until, :for
|
|
384
|
+
'FOR'
|
|
385
|
+
when :return
|
|
386
|
+
'RETURN'
|
|
387
|
+
when :lvasgn, :ivasgn, :cvasgn, :gvasgn, :casgn, :masgn, :op_asgn
|
|
388
|
+
'ASSIGN'
|
|
389
|
+
when :case
|
|
390
|
+
'SWITCH'
|
|
391
|
+
when :when
|
|
392
|
+
'IF' # case arms are conditional branches at the canonical level
|
|
393
|
+
when :break
|
|
394
|
+
'BREAK'
|
|
395
|
+
when :next
|
|
396
|
+
'CONTINUE'
|
|
397
|
+
when :begin
|
|
398
|
+
# :begin can be a statement list OR a try/rescue
|
|
399
|
+
# construct; distinguish via presence of :rescue
|
|
400
|
+
# children.
|
|
401
|
+
has_rescue = node.children.any? { |c| c.is_a?(::Parser::AST::Node) && c.type == :rescue }
|
|
402
|
+
has_rescue ? 'TRY' : 'BLOCK'
|
|
403
|
+
when :rescue, :resbody
|
|
404
|
+
'TRY'
|
|
405
|
+
when :ensure
|
|
406
|
+
'DEFER'
|
|
407
|
+
when :send, :csend
|
|
408
|
+
# Ruby binary operators are method calls under the
|
|
409
|
+
# hood (`1 + 2` is `1.+(2)`). Distinguish them by
|
|
410
|
+
# method name so `+` and `-` hash differently.
|
|
411
|
+
method = node.children[1].to_s
|
|
412
|
+
case method
|
|
413
|
+
when '+', '-', '*', '/', '%', '**', '<<', '>>', '&', '|', '^',
|
|
414
|
+
'==', '!=', '===', '<', '<=', '>', '>=', '=~', '<=>'
|
|
415
|
+
"BIN:#{method}"
|
|
416
|
+
when '!', '-@', '+@', '~'
|
|
417
|
+
"UN:#{method}"
|
|
418
|
+
else
|
|
419
|
+
'CALL'
|
|
420
|
+
end
|
|
421
|
+
when :block
|
|
422
|
+
'CALL' # `foo { ... }` is a call with a block
|
|
423
|
+
when :and, :or
|
|
424
|
+
node.type == :and ? 'BIN:&&' : 'BIN:||'
|
|
425
|
+
when :not
|
|
426
|
+
'UN:!'
|
|
427
|
+
when :index
|
|
428
|
+
'INDEX'
|
|
429
|
+
when :const, :lvar, :ivar, :cvar, :gvar, :arg
|
|
430
|
+
'ID'
|
|
431
|
+
when :str
|
|
432
|
+
'LIT:STR'
|
|
433
|
+
when :int, :float
|
|
434
|
+
'LIT:NUM'
|
|
435
|
+
when :true, :false
|
|
436
|
+
'LIT:BOOL'
|
|
437
|
+
when :nil
|
|
438
|
+
'LIT:NIL'
|
|
439
|
+
when :sym
|
|
440
|
+
'LIT:SYM'
|
|
441
|
+
when :class, :module, :def, :defs, :sclass
|
|
442
|
+
'NODE'
|
|
443
|
+
else
|
|
444
|
+
'NODE'
|
|
445
|
+
end
|
|
446
|
+
end
|
|
447
|
+
|
|
448
|
+
# ────────────────────────────────────────────────
|
|
449
|
+
# Block extraction
|
|
450
|
+
# ────────────────────────────────────────────────
|
|
451
|
+
|
|
452
|
+
def block_ref_class
|
|
453
|
+
@block_ref_class ||= ::Revund::Worker::V1::BlockRef
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
def extract_blocks(fn_node)
|
|
457
|
+
out = []
|
|
458
|
+
body = function_body(fn_node)
|
|
459
|
+
return out if body.nil?
|
|
460
|
+
|
|
461
|
+
walk(body) do |node|
|
|
462
|
+
next unless node.is_a?(::Parser::AST::Node)
|
|
463
|
+
case node.type
|
|
464
|
+
when :if
|
|
465
|
+
then_body = node.children[1]
|
|
466
|
+
else_body = node.children[2]
|
|
467
|
+
add_block(out, then_body, 'if') if then_body
|
|
468
|
+
add_block(out, else_body, 'else') if else_body
|
|
469
|
+
when :while, :until
|
|
470
|
+
body_node = node.children[1]
|
|
471
|
+
add_block(out, body_node, 'for') if body_node
|
|
472
|
+
when :for
|
|
473
|
+
body_node = node.children[2]
|
|
474
|
+
add_block(out, body_node, 'for') if body_node
|
|
475
|
+
when :when
|
|
476
|
+
body_node = node.children.last
|
|
477
|
+
add_block(out, body_node, 'case') if body_node
|
|
478
|
+
when :resbody
|
|
479
|
+
body_node = node.children[2]
|
|
480
|
+
add_block(out, body_node, 'rescue') if body_node
|
|
481
|
+
end
|
|
482
|
+
end
|
|
483
|
+
out
|
|
484
|
+
end
|
|
485
|
+
|
|
486
|
+
def add_block(out, body_node, kind)
|
|
487
|
+
stmts = body_statements(body_node)
|
|
488
|
+
return if stmts.size < MIN_BLOCK_STMTS
|
|
489
|
+
|
|
490
|
+
hash = hash_node(body_node)
|
|
491
|
+
canon = canonical_hash_node(body_node)
|
|
492
|
+
return if hash.empty? && canon.empty?
|
|
493
|
+
|
|
494
|
+
out << block_ref_class.new(
|
|
495
|
+
kind: kind,
|
|
496
|
+
start_line: body_node.loc.line,
|
|
497
|
+
end_line: end_line_of(body_node),
|
|
498
|
+
hash: hash,
|
|
499
|
+
canonical_hash: canon,
|
|
500
|
+
)
|
|
501
|
+
end
|
|
502
|
+
|
|
503
|
+
def body_statements(body_node)
|
|
504
|
+
return [] unless body_node.is_a?(::Parser::AST::Node)
|
|
505
|
+
return body_node.children if body_node.type == :begin
|
|
506
|
+
[body_node]
|
|
507
|
+
end
|
|
508
|
+
|
|
509
|
+
def hash_node(node)
|
|
510
|
+
tokens = []
|
|
511
|
+
nodes = walk_for_hash(node, tokens, canonical: false)
|
|
512
|
+
return '' if nodes <= 2
|
|
513
|
+
Digest::SHA1.hexdigest(tokens.join(';'))[0, HASH_HEX_LEN]
|
|
514
|
+
end
|
|
515
|
+
|
|
516
|
+
def canonical_hash_node(node)
|
|
517
|
+
tokens = []
|
|
518
|
+
nodes = walk_for_hash(node, tokens, canonical: true)
|
|
519
|
+
return '' if nodes <= 2
|
|
520
|
+
Digest::SHA1.hexdigest(tokens.join(';'))[0, HASH_HEX_LEN]
|
|
521
|
+
end
|
|
522
|
+
|
|
523
|
+
# ────────────────────────────────────────────────
|
|
524
|
+
# Concerns
|
|
525
|
+
# ────────────────────────────────────────────────
|
|
526
|
+
|
|
527
|
+
def concern_ref_class
|
|
528
|
+
@concern_ref_class ||= ::Revund::Worker::V1::ConcernEvidenceRef
|
|
529
|
+
end
|
|
530
|
+
|
|
531
|
+
def collect_concerns(ast)
|
|
532
|
+
out = []
|
|
533
|
+
|
|
534
|
+
walk(ast) do |node|
|
|
535
|
+
next unless node.is_a?(::Parser::AST::Node)
|
|
536
|
+
|
|
537
|
+
case node.type
|
|
538
|
+
when :send, :csend
|
|
539
|
+
classify_send(node, out)
|
|
540
|
+
when :index
|
|
541
|
+
receiver = node.children[0]
|
|
542
|
+
next unless receiver.is_a?(::Parser::AST::Node)
|
|
543
|
+
recv_name = receiver_name(receiver)
|
|
544
|
+
if %w[session cookies].include?(recv_name)
|
|
545
|
+
out << make_concern('state', node.loc.line, "#{recv_name}[]")
|
|
546
|
+
elsif recv_name == 'ENV'
|
|
547
|
+
out << make_concern('config', node.loc.line, 'ENV[]')
|
|
548
|
+
end
|
|
549
|
+
when :gvar
|
|
550
|
+
name = node.children[0].to_s
|
|
551
|
+
out << make_concern('state', node.loc.line, name, 'global variable')
|
|
552
|
+
end
|
|
553
|
+
end
|
|
554
|
+
|
|
555
|
+
walk(ast) do |node|
|
|
556
|
+
next unless node.is_a?(::Parser::AST::Node)
|
|
557
|
+
next unless %i[def defs].include?(node.type)
|
|
558
|
+
if cyclomatic_complexity(node) >= BUSINESS_COMPLEXITY
|
|
559
|
+
name = node.type == :def ? node.children[0].to_s : node.children[1].to_s
|
|
560
|
+
out << make_concern('business', node.loc.line, name, 'complex method')
|
|
561
|
+
end
|
|
562
|
+
end
|
|
563
|
+
|
|
564
|
+
out
|
|
565
|
+
end
|
|
566
|
+
|
|
567
|
+
def classify_send(node, out)
|
|
568
|
+
receiver, method = node.children[0], node.children[1]
|
|
569
|
+
line = node.loc.line
|
|
570
|
+
|
|
571
|
+
recv_name = receiver_name(receiver)
|
|
572
|
+
method_s = method.to_s
|
|
573
|
+
|
|
574
|
+
# Rails.cache, Rails.application.config — state / config.
|
|
575
|
+
if recv_name == 'Rails' && method_s == 'cache'
|
|
576
|
+
out << make_concern('state', line, 'Rails.cache')
|
|
577
|
+
return
|
|
578
|
+
end
|
|
579
|
+
|
|
580
|
+
# Network: Net::HTTP, Faraday, HTTParty, RestClient.
|
|
581
|
+
if %w[Net::HTTP Faraday HTTParty RestClient].include?(recv_name)
|
|
582
|
+
if %w[get post put delete patch head options new start request].include?(method_s)
|
|
583
|
+
out << make_concern('network', line, "#{recv_name}.#{method_s}")
|
|
584
|
+
return
|
|
585
|
+
end
|
|
586
|
+
end
|
|
587
|
+
|
|
588
|
+
# ActiveRecord query methods on Model receivers (capitalized
|
|
589
|
+
# const).
|
|
590
|
+
if receiver.is_a?(::Parser::AST::Node) && receiver.type == :const
|
|
591
|
+
query_methods = %w[where find find_by first last all pluck order limit
|
|
592
|
+
create update destroy save count exists?]
|
|
593
|
+
if query_methods.include?(method_s)
|
|
594
|
+
out << make_concern('dataaccess', line, "#{recv_name}.#{method_s}")
|
|
595
|
+
return
|
|
596
|
+
end
|
|
597
|
+
end
|
|
598
|
+
|
|
599
|
+
# IO: File / IO / Dir.
|
|
600
|
+
if %w[File IO Dir].include?(recv_name)
|
|
601
|
+
out << make_concern('io', line, "#{recv_name}.#{method_s}")
|
|
602
|
+
return
|
|
603
|
+
end
|
|
604
|
+
|
|
605
|
+
# Config: ENV.fetch, ENV.foo etc.
|
|
606
|
+
if recv_name == 'ENV'
|
|
607
|
+
out << make_concern('config', line, "ENV.#{method_s}")
|
|
608
|
+
return
|
|
609
|
+
end
|
|
610
|
+
end
|
|
611
|
+
|
|
612
|
+
# Returns a human-readable name for a receiver node, or "".
|
|
613
|
+
def receiver_name(node)
|
|
614
|
+
return '' if node.nil?
|
|
615
|
+
return '' unless node.is_a?(::Parser::AST::Node)
|
|
616
|
+
|
|
617
|
+
case node.type
|
|
618
|
+
when :const
|
|
619
|
+
parent = node.children[0]
|
|
620
|
+
name = node.children[1].to_s
|
|
621
|
+
if parent && parent.is_a?(::Parser::AST::Node) && parent.type == :const
|
|
622
|
+
"#{receiver_name(parent)}::#{name}"
|
|
623
|
+
else
|
|
624
|
+
name
|
|
625
|
+
end
|
|
626
|
+
when :send
|
|
627
|
+
node.children[1].to_s
|
|
628
|
+
else
|
|
629
|
+
''
|
|
630
|
+
end
|
|
631
|
+
end
|
|
632
|
+
|
|
633
|
+
def make_concern(category, line, symbol, note = '')
|
|
634
|
+
concern_ref_class.new(
|
|
635
|
+
category: category,
|
|
636
|
+
line: line,
|
|
637
|
+
symbol: symbol,
|
|
638
|
+
note: note,
|
|
639
|
+
)
|
|
640
|
+
end
|
|
641
|
+
|
|
642
|
+
# ────────────────────────────────────────────────
|
|
643
|
+
# Generic tree walker
|
|
644
|
+
# ────────────────────────────────────────────────
|
|
645
|
+
|
|
646
|
+
# Iterative pre-order traversal. Yields every
|
|
647
|
+
# Parser::AST::Node descendant (including the root).
|
|
648
|
+
def walk(root)
|
|
649
|
+
stack = [root]
|
|
650
|
+
until stack.empty?
|
|
651
|
+
node = stack.pop
|
|
652
|
+
next if node.nil?
|
|
653
|
+
next unless node.is_a?(::Parser::AST::Node)
|
|
654
|
+
yield node
|
|
655
|
+
# Push children in reverse so they pop in document
|
|
656
|
+
# order.
|
|
657
|
+
node.children.reverse_each do |child|
|
|
658
|
+
stack.push(child)
|
|
659
|
+
end
|
|
660
|
+
end
|
|
661
|
+
end
|
|
662
|
+
end
|
|
663
|
+
end
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'grpc'
|
|
4
|
+
require_relative 'service'
|
|
5
|
+
|
|
6
|
+
module RubyWorker
|
|
7
|
+
# Server is the gRPC entry. Implements the universal
|
|
8
|
+
# `revund.worker.v1.Worker` contract — same shape as ts-worker
|
|
9
|
+
# and php-worker. Bind, advertise readiness on stdout, register
|
|
10
|
+
# the service, serve.
|
|
11
|
+
class Server
|
|
12
|
+
VERSION = '0.1.0'
|
|
13
|
+
|
|
14
|
+
def initialize(port)
|
|
15
|
+
@port = port
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def run
|
|
19
|
+
server = GRPC::RpcServer.new
|
|
20
|
+
server.add_http2_port("0.0.0.0:#{@port}", :this_port_is_insecure)
|
|
21
|
+
server.handle(RubyWorker::Service.new)
|
|
22
|
+
|
|
23
|
+
# Liveness ping for parent processes that spawn this worker
|
|
24
|
+
# as a sidecar.
|
|
25
|
+
$stdout.puts("ready: 0.0.0.0:#{@port}")
|
|
26
|
+
$stdout.flush
|
|
27
|
+
|
|
28
|
+
# Graceful shutdown on SIGTERM / SIGINT.
|
|
29
|
+
%w[TERM INT].each do |sig|
|
|
30
|
+
trap(sig) { server.stop }
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
server.run_till_terminated
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|