skylight 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/skylight/version.rb +1 -1
- data/lib/skylight/worker/server.rb +1 -1
- data/lib/sql_lexer.rb +1 -0
- data/lib/sql_lexer/lexer.rb +4 -50
- data/lib/sql_lexer/version.rb +1 -1
- data/too-many-sockets.md +62 -0
- metadata +3 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 5ba19fdc679a43bb87c5cf95b843b05ce58878cb
         | 
| 4 | 
            +
              data.tar.gz: 7cdf7d06b0eba50a07cbea357929bcd8b8231f73
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: c59d80d86beb6ff52709bb48a3182373ae2557f901419e4603dcaac64072f6144b3c6735fc2aef5fae3fd487647f230dd6c582f67b54052961e080aba307f120
         | 
| 7 | 
            +
              data.tar.gz: 260f99fcf7b8a7b7074538e2083df7b334bbd4e1afd4b1e6874dbbaafc2616567127507e00c6bd7724c8a55c32b9cf479759d6eac0961fda6a8bbe10b02d5914
         | 
    
        data/lib/skylight/version.rb
    CHANGED
    
    
    
        data/lib/sql_lexer.rb
    CHANGED
    
    
    
        data/lib/sql_lexer/lexer.rb
    CHANGED
    
    | @@ -29,8 +29,6 @@ module SqlLexer | |
| 29 29 | 
             
                StartString   = %Q<'>
         | 
| 30 30 | 
             
                StartDigit    = %q<[\p{Digit}\.]>
         | 
| 31 31 |  | 
| 32 | 
            -
                StartSelect   = %Q<SELECT(?=(?:[#{WS}]|#{OpPart}))>
         | 
| 33 | 
            -
             | 
| 34 32 | 
             
                # Binds that are also IDs do not need to be included here, since AfterOp (which uses StartBind)
         | 
| 35 33 | 
             
                # also checks for StartAnyId
         | 
| 36 34 | 
             
                StartBind     = %Q<#{StartString}|#{StartDigit}|#{SpecialOps}>
         | 
| @@ -87,9 +85,7 @@ module SqlLexer | |
| 87 85 | 
             
                TkArray       = %r<#{ArrayOp}>iu
         | 
| 88 86 | 
             
                TkArrayIndex  = %r<#{ArrayIndexOp}>iu
         | 
| 89 87 | 
             
                TkSpecialOp   = %r<#{SpecialOps}>iu
         | 
| 90 | 
            -
                TkStartSelect = %r | 
| 91 | 
            -
                TkStartSubquery = %r<\(#{OptWS}#{StartSelect}>iu
         | 
| 92 | 
            -
                TkCloseParen  = %r<#{OptWS}\)>u
         | 
| 88 | 
            +
                TkStartSelect = %r<SELECT(?=(?:[#{WS}]|#{OpPart}))>iu
         | 
| 93 89 |  | 
| 94 90 | 
             
                STATE_HANDLERS = {
         | 
| 95 91 | 
             
                  begin:       :process_begin,
         | 
| @@ -101,7 +97,6 @@ module SqlLexer | |
| 101 97 | 
             
                  table_name:  :process_table_name,
         | 
| 102 98 | 
             
                  end:         :process_end,
         | 
| 103 99 | 
             
                  special:     :process_special,
         | 
| 104 | 
            -
                  subquery:    :process_subquery,
         | 
| 105 100 | 
             
                  in:          :process_in,
         | 
| 106 101 | 
             
                  array:       :process_array
         | 
| 107 102 | 
             
                }
         | 
| @@ -356,12 +351,8 @@ module SqlLexer | |
| 356 351 | 
             
                def process_special
         | 
| 357 352 | 
             
                  if @scanner.skip(TkIn)
         | 
| 358 353 | 
             
                    @scanner.skip(TkOptWS)
         | 
| 359 | 
            -
                     | 
| 360 | 
            -
             | 
| 361 | 
            -
                    else
         | 
| 362 | 
            -
                      @scanner.skip(/\(/u)
         | 
| 363 | 
            -
                      @state = :in
         | 
| 364 | 
            -
                    end
         | 
| 354 | 
            +
                    @scanner.skip(/\(/u)
         | 
| 355 | 
            +
                    @state = :in
         | 
| 365 356 | 
             
                  elsif @scanner.skip(TkArray)
         | 
| 366 357 | 
             
                    @scanner.skip(/\[/u)
         | 
| 367 358 | 
             
                    @state = :array
         | 
| @@ -371,48 +362,11 @@ module SqlLexer | |
| 371 362 | 
             
                    else
         | 
| 372 363 | 
             
                      @state = :end
         | 
| 373 364 | 
             
                    end
         | 
| 374 | 
            -
                  elsif @scanner.skip(TkStartSubquery)
         | 
| 375 | 
            -
                    @state = :subquery
         | 
| 376 365 | 
             
                  elsif @scanner.skip(TkArrayIndex)
         | 
| 377 366 | 
             
                    @state = :tokens
         | 
| 378 367 | 
             
                  end
         | 
| 379 368 | 
             
                end
         | 
| 380 369 |  | 
| 381 | 
            -
                def process_subquery
         | 
| 382 | 
            -
                  nest = 1
         | 
| 383 | 
            -
                  iterations = 0
         | 
| 384 | 
            -
             | 
| 385 | 
            -
                  while nest > 0
         | 
| 386 | 
            -
                    iterations += 1
         | 
| 387 | 
            -
             | 
| 388 | 
            -
                    if iterations > 10_000
         | 
| 389 | 
            -
                      raise "The SQL '#{@scanner.string}' could not be parsed because of too many iterations in subquery"
         | 
| 390 | 
            -
                    end
         | 
| 391 | 
            -
             | 
| 392 | 
            -
                    if @debug
         | 
| 393 | 
            -
                      p @state
         | 
| 394 | 
            -
                      p @scanner
         | 
| 395 | 
            -
                      p nest
         | 
| 396 | 
            -
                      p @scanner.peek(1)
         | 
| 397 | 
            -
                    end
         | 
| 398 | 
            -
             | 
| 399 | 
            -
                    if @scanner.skip(TkStartSubquery)
         | 
| 400 | 
            -
                      nest += 1
         | 
| 401 | 
            -
                      @state = :tokens
         | 
| 402 | 
            -
                    elsif @scanner.skip(TkCloseParen)
         | 
| 403 | 
            -
                      nest -= 1
         | 
| 404 | 
            -
                      break if nest.zero?
         | 
| 405 | 
            -
                      @state = :tokens
         | 
| 406 | 
            -
                    elsif @state == :subquery
         | 
| 407 | 
            -
                      @state = :tokens
         | 
| 408 | 
            -
                    end
         | 
| 409 | 
            -
             | 
| 410 | 
            -
                    __send__ STATE_HANDLERS[@state]
         | 
| 411 | 
            -
                  end
         | 
| 412 | 
            -
             | 
| 413 | 
            -
                  @state = :tokens
         | 
| 414 | 
            -
                end
         | 
| 415 | 
            -
             | 
| 416 370 | 
             
                def process_in
         | 
| 417 371 | 
             
                  nest = 1
         | 
| 418 372 | 
             
                  iterations = 0
         | 
| @@ -436,7 +390,7 @@ module SqlLexer | |
| 436 390 | 
             
                    if @scanner.skip(/\(/u)
         | 
| 437 391 | 
             
                      nest += 1
         | 
| 438 392 | 
             
                      process_tokens
         | 
| 439 | 
            -
                    elsif @scanner.skip( | 
| 393 | 
            +
                    elsif @scanner.skip(/\)/u)
         | 
| 440 394 | 
             
                      nest -= 1
         | 
| 441 395 | 
             
                      break if nest.zero?
         | 
| 442 396 | 
             
                      process_tokens
         | 
    
        data/lib/sql_lexer/version.rb
    CHANGED
    
    
    
        data/too-many-sockets.md
    ADDED
    
    | @@ -0,0 +1,62 @@ | |
| 1 | 
            +
            We received a notification (Zendesk #280) from a customer that they were unable to SSH
         | 
| 2 | 
            +
            into their box, and that their service provider discovered that Skylight
         | 
| 3 | 
            +
            was triggering socket buffer limits.
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            The customer immediately disabled Skylight, which fixed the bug for him.
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            Customer Information:
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            * Using Skylight 0.2.3
         | 
| 10 | 
            +
            * Ruby 2.0.0p353
         | 
| 11 | 
            +
            * Also had New Relic installed (Bug #46)
         | 
| 12 | 
            +
            * Recently upgraded to Rails 4
         | 
| 13 | 
            +
            * Passenger Enterprise with a max-pool size of 12 and zero-downtime deploys
         | 
| 14 | 
            +
            * He had a maxsockbuf of 32MB
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            The customer also reported 134 open sockets in the server process.
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            Separately, we discovered that New Relic was making our payloads
         | 
| 19 | 
            +
            extremely large.
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            Our working hypothesis was that the large New Relic payloads were
         | 
| 22 | 
            +
            triggering the max buffer condition, which was then cascading into more
         | 
| 23 | 
            +
            failures.
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            Upon further investigation, we discovered a few things:
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            * If the client hits a kernel buffer limit (via EWOULDBLOCK that lasts
         | 
| 28 | 
            +
              more than 5s), it closes down its socket
         | 
| 29 | 
            +
            * The server expects to recover from this situation by getting the
         | 
| 30 | 
            +
              client socket in its read list from IO.select. It would then try to
         | 
| 31 | 
            +
              read from it, get an EOF, and close the socket.
         | 
| 32 | 
            +
            * If the server does not get the client socket in its read list for some
         | 
| 33 | 
            +
              (unknown) reason, this would result in an ever-growing list of sockets
         | 
| 34 | 
            +
              on the server side.
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            We are not sure why the sockets would not appear in the read list, but
         | 
| 37 | 
            +
            we hypothesize that when the OpenVZ limit is reached, the kernel no
         | 
| 38 | 
            +
            longer includes the socket in the read list, causing the server to never
         | 
| 39 | 
            +
            close the socket.
         | 
| 40 | 
            +
             | 
| 41 | 
            +
            Mitigation strategies:
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            * The New Relic fix (#46) should reduce the likelihood of encountering
         | 
| 44 | 
            +
              this in the first place
         | 
| 45 | 
            +
            * We want to add a Hello heartbeat from the client. If the server
         | 
| 46 | 
            +
              doesn't receive a message every 1m (or 2m, TBD), it will close the
         | 
| 47 | 
            +
              socket even if it's not in the read list.
         | 
| 48 | 
            +
             | 
| 49 | 
            +
            We are worried that if the server process ever gets stuck, this
         | 
| 50 | 
            +
            condition can occur. We are also considering a server heartbeat back to
         | 
| 51 | 
            +
            the clients, so they can take corrective action if the agent gets stuck.
         | 
| 52 | 
            +
             | 
| 53 | 
            +
            Some mitigations for this situation:
         | 
| 54 | 
            +
             | 
| 55 | 
            +
            * Kill the server process when a respawn is necessary. This will ensure
         | 
| 56 | 
            +
              that a stuck agent process doesn't duplicate into N servers.
         | 
| 57 | 
            +
            * Before connecting to the server socket, pre-check the server process 
         | 
| 58 | 
            +
              with kill -0.
         | 
| 59 | 
            +
             | 
| 60 | 
            +
            For next time: 
         | 
| 61 | 
            +
            * Request skylight.log for easier debugging
         | 
| 62 | 
            +
            * Form a best practices for customer bug reporting on doc site
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: skylight
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.2. | 
| 4 | 
            +
              version: 0.2.5
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Tilde, Inc.
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2014-02- | 
| 11 | 
            +
            date: 2014-02-22 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: activesupport
         | 
| @@ -142,6 +142,7 @@ files: | |
| 142 142 | 
             
            - lib/sql_lexer.rb
         | 
| 143 143 | 
             
            - lib/sql_lexer/lexer.rb
         | 
| 144 144 | 
             
            - lib/sql_lexer/version.rb
         | 
| 145 | 
            +
            - too-many-sockets.md
         | 
| 145 146 | 
             
            homepage: http://www.skylight.io
         | 
| 146 147 | 
             
            licenses: []
         | 
| 147 148 | 
             
            metadata: {}
         |