skylight 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/skylight/version.rb +1 -1
- data/lib/skylight/worker/server.rb +1 -1
- data/lib/sql_lexer.rb +1 -0
- data/lib/sql_lexer/lexer.rb +4 -50
- data/lib/sql_lexer/version.rb +1 -1
- data/too-many-sockets.md +62 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5ba19fdc679a43bb87c5cf95b843b05ce58878cb
|
4
|
+
data.tar.gz: 7cdf7d06b0eba50a07cbea357929bcd8b8231f73
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c59d80d86beb6ff52709bb48a3182373ae2557f901419e4603dcaac64072f6144b3c6735fc2aef5fae3fd487647f230dd6c582f67b54052961e080aba307f120
|
7
|
+
data.tar.gz: 260f99fcf7b8a7b7074538e2083df7b334bbd4e1afd4b1e6874dbbaafc2616567127507e00c6bd7724c8a55c32b9cf479759d6eac0961fda6a8bbe10b02d5914
|
data/lib/skylight/version.rb
CHANGED
data/lib/sql_lexer.rb
CHANGED
data/lib/sql_lexer/lexer.rb
CHANGED
@@ -29,8 +29,6 @@ module SqlLexer
|
|
29
29
|
StartString = %Q<'>
|
30
30
|
StartDigit = %q<[\p{Digit}\.]>
|
31
31
|
|
32
|
-
StartSelect = %Q<SELECT(?=(?:[#{WS}]|#{OpPart}))>
|
33
|
-
|
34
32
|
# Binds that are also IDs do not need to be included here, since AfterOp (which uses StartBind)
|
35
33
|
# also checks for StartAnyId
|
36
34
|
StartBind = %Q<#{StartString}|#{StartDigit}|#{SpecialOps}>
|
@@ -87,9 +85,7 @@ module SqlLexer
|
|
87
85
|
TkArray = %r<#{ArrayOp}>iu
|
88
86
|
TkArrayIndex = %r<#{ArrayIndexOp}>iu
|
89
87
|
TkSpecialOp = %r<#{SpecialOps}>iu
|
90
|
-
TkStartSelect = %r
|
91
|
-
TkStartSubquery = %r<\(#{OptWS}#{StartSelect}>iu
|
92
|
-
TkCloseParen = %r<#{OptWS}\)>u
|
88
|
+
TkStartSelect = %r<SELECT(?=(?:[#{WS}]|#{OpPart}))>iu
|
93
89
|
|
94
90
|
STATE_HANDLERS = {
|
95
91
|
begin: :process_begin,
|
@@ -101,7 +97,6 @@ module SqlLexer
|
|
101
97
|
table_name: :process_table_name,
|
102
98
|
end: :process_end,
|
103
99
|
special: :process_special,
|
104
|
-
subquery: :process_subquery,
|
105
100
|
in: :process_in,
|
106
101
|
array: :process_array
|
107
102
|
}
|
@@ -356,12 +351,8 @@ module SqlLexer
|
|
356
351
|
def process_special
|
357
352
|
if @scanner.skip(TkIn)
|
358
353
|
@scanner.skip(TkOptWS)
|
359
|
-
|
360
|
-
|
361
|
-
else
|
362
|
-
@scanner.skip(/\(/u)
|
363
|
-
@state = :in
|
364
|
-
end
|
354
|
+
@scanner.skip(/\(/u)
|
355
|
+
@state = :in
|
365
356
|
elsif @scanner.skip(TkArray)
|
366
357
|
@scanner.skip(/\[/u)
|
367
358
|
@state = :array
|
@@ -371,48 +362,11 @@ module SqlLexer
|
|
371
362
|
else
|
372
363
|
@state = :end
|
373
364
|
end
|
374
|
-
elsif @scanner.skip(TkStartSubquery)
|
375
|
-
@state = :subquery
|
376
365
|
elsif @scanner.skip(TkArrayIndex)
|
377
366
|
@state = :tokens
|
378
367
|
end
|
379
368
|
end
|
380
369
|
|
381
|
-
def process_subquery
|
382
|
-
nest = 1
|
383
|
-
iterations = 0
|
384
|
-
|
385
|
-
while nest > 0
|
386
|
-
iterations += 1
|
387
|
-
|
388
|
-
if iterations > 10_000
|
389
|
-
raise "The SQL '#{@scanner.string}' could not be parsed because of too many iterations in subquery"
|
390
|
-
end
|
391
|
-
|
392
|
-
if @debug
|
393
|
-
p @state
|
394
|
-
p @scanner
|
395
|
-
p nest
|
396
|
-
p @scanner.peek(1)
|
397
|
-
end
|
398
|
-
|
399
|
-
if @scanner.skip(TkStartSubquery)
|
400
|
-
nest += 1
|
401
|
-
@state = :tokens
|
402
|
-
elsif @scanner.skip(TkCloseParen)
|
403
|
-
nest -= 1
|
404
|
-
break if nest.zero?
|
405
|
-
@state = :tokens
|
406
|
-
elsif @state == :subquery
|
407
|
-
@state = :tokens
|
408
|
-
end
|
409
|
-
|
410
|
-
__send__ STATE_HANDLERS[@state]
|
411
|
-
end
|
412
|
-
|
413
|
-
@state = :tokens
|
414
|
-
end
|
415
|
-
|
416
370
|
def process_in
|
417
371
|
nest = 1
|
418
372
|
iterations = 0
|
@@ -436,7 +390,7 @@ module SqlLexer
|
|
436
390
|
if @scanner.skip(/\(/u)
|
437
391
|
nest += 1
|
438
392
|
process_tokens
|
439
|
-
elsif @scanner.skip(
|
393
|
+
elsif @scanner.skip(/\)/u)
|
440
394
|
nest -= 1
|
441
395
|
break if nest.zero?
|
442
396
|
process_tokens
|
data/lib/sql_lexer/version.rb
CHANGED
data/too-many-sockets.md
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
We received a notification (Zendesk #280) from a customer that they were unable to SSH
|
2
|
+
into their box, and that their service provider discovered that Skylight
|
3
|
+
was triggering socket buffer limits.
|
4
|
+
|
5
|
+
The customer immediately disabled Skylight, which fixed the bug for him.
|
6
|
+
|
7
|
+
Customer Information:
|
8
|
+
|
9
|
+
* Using Skylight 0.2.3
|
10
|
+
* Ruby 2.0.0p353
|
11
|
+
* Also had New Relic installed (Bug #46)
|
12
|
+
* Recently upgraded to Rails 4
|
13
|
+
* Passenger Enterprise with a max-pool size of 12 and zero-downtime deploys
|
14
|
+
* He had a maxsockbuf of 32MB
|
15
|
+
|
16
|
+
The customer also reported 134 open sockets in the server process.
|
17
|
+
|
18
|
+
Separately, we discovered that New Relic was making our payloads
|
19
|
+
extremely large.
|
20
|
+
|
21
|
+
Our working hypothesis was that the large New Relic payloads were
|
22
|
+
triggering the max buffer condition, which was then cascading into more
|
23
|
+
failures.
|
24
|
+
|
25
|
+
Upon further investigation, we discovered a few things:
|
26
|
+
|
27
|
+
* If the client hits a kernel buffer limit (via EWOULDBLOCK that lasts
|
28
|
+
more than 5s), it closes down its socket
|
29
|
+
* The server expects to recover from this situation by getting the
|
30
|
+
client socket in its read list from IO.select. It would then try to
|
31
|
+
read from it, get an EOF, and close the socket.
|
32
|
+
* If the server does not get the client socket in its read list for some
|
33
|
+
(unknown) reason, this would result in an ever-growing list of sockets
|
34
|
+
on the server side.
|
35
|
+
|
36
|
+
We are not sure why the sockets would not appear in the read list, but
|
37
|
+
we hypothesize that when the OpenVZ limit is reached, the kernel no
|
38
|
+
longer includes the socket in the read list, causing the server to never
|
39
|
+
close the socket.
|
40
|
+
|
41
|
+
Mitigation strategies:
|
42
|
+
|
43
|
+
* The New Relic fix (#46) should reduce the likelihood of encountering
|
44
|
+
this in the first place
|
45
|
+
* We want to add a Hello heartbeat from the client. If the server
|
46
|
+
doesn't receive a message every 1m (or 2m, TBD), it will close the
|
47
|
+
socket even if it's not in the read list.
|
48
|
+
|
49
|
+
We are worried that if the server process ever gets stuck, this
|
50
|
+
condition can occur. We are also considering a server heartbeat back to
|
51
|
+
the clients, so they can take corrective action if the agent gets stuck.
|
52
|
+
|
53
|
+
Some mitigations for this situation:
|
54
|
+
|
55
|
+
* Kill the server process when a respawn is necessary. This will ensure
|
56
|
+
that a stuck agent process doesn't duplicate into N servers.
|
57
|
+
* Before connecting to the server socket, pre-check the server process
|
58
|
+
with kill -0.
|
59
|
+
|
60
|
+
For next time:
|
61
|
+
* Request skylight.log for easier debugging
|
62
|
+
* Form a best practices for customer bug reporting on doc site
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: skylight
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tilde, Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-02-
|
11
|
+
date: 2014-02-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -142,6 +142,7 @@ files:
|
|
142
142
|
- lib/sql_lexer.rb
|
143
143
|
- lib/sql_lexer/lexer.rb
|
144
144
|
- lib/sql_lexer/version.rb
|
145
|
+
- too-many-sockets.md
|
145
146
|
homepage: http://www.skylight.io
|
146
147
|
licenses: []
|
147
148
|
metadata: {}
|