atatus 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Gemfile +72 -22
- data/LICENSE +1 -1
- data/atatus.gemspec +2 -2
- data/lib/atatus.rb +76 -16
- data/lib/atatus/agent.rb +78 -29
- data/lib/atatus/central_config.rb +72 -27
- data/lib/atatus/central_config/cache_control.rb +18 -1
- data/lib/atatus/child_durations.rb +64 -0
- data/lib/atatus/collector/base.rb +61 -29
- data/lib/atatus/collector/builder.rb +46 -2
- data/lib/atatus/collector/hist.rb +54 -0
- data/lib/atatus/collector/transport.rb +41 -11
- data/lib/atatus/config.rb +129 -28
- data/lib/atatus/config/bytes.rb +17 -0
- data/lib/atatus/config/duration.rb +17 -0
- data/lib/atatus/config/options.rb +29 -9
- data/lib/atatus/config/regexp_list.rb +17 -0
- data/lib/atatus/config/wildcard_pattern_list.rb +64 -0
- data/lib/atatus/context.rb +32 -1
- data/lib/atatus/context/request.rb +17 -0
- data/lib/atatus/context/request/socket.rb +18 -1
- data/lib/atatus/context/request/url.rb +17 -0
- data/lib/atatus/context/response.rb +27 -2
- data/lib/atatus/context/user.rb +17 -0
- data/lib/atatus/context_builder.rb +19 -4
- data/lib/atatus/deprecations.rb +17 -0
- data/lib/atatus/error.rb +27 -0
- data/lib/atatus/error/exception.rb +24 -0
- data/lib/atatus/error/log.rb +17 -0
- data/lib/atatus/error_builder.rb +17 -2
- data/lib/atatus/grape.rb +62 -0
- data/lib/atatus/graphql.rb +91 -0
- data/lib/atatus/grpc.rb +99 -0
- data/lib/atatus/instrumenter.rb +135 -30
- data/lib/atatus/internal_error.rb +17 -0
- data/lib/atatus/logging.rb +17 -2
- data/lib/atatus/metadata.rb +17 -0
- data/lib/atatus/metadata/process_info.rb +17 -0
- data/lib/atatus/metadata/service_info.rb +21 -6
- data/lib/atatus/metadata/system_info.rb +22 -3
- data/lib/atatus/metadata/system_info/container_info.rb +49 -10
- data/lib/atatus/metadata/system_info/hw_info.rb +1 -1
- data/lib/atatus/metrics.rb +69 -27
- data/lib/atatus/metrics/breakdown_set.rb +31 -0
- data/lib/atatus/metrics/{cpu_mem.rb → cpu_mem_set.rb} +110 -63
- data/lib/atatus/metrics/metric.rb +140 -0
- data/lib/atatus/metrics/set.rb +123 -0
- data/lib/atatus/metrics/span_scoped_set.rb +56 -0
- data/lib/atatus/metrics/transaction_set.rb +26 -0
- data/lib/atatus/metrics/vm_set.rb +58 -0
- data/lib/atatus/metricset.rb +48 -4
- data/lib/atatus/middleware.rb +28 -8
- data/lib/atatus/naively_hashable.rb +17 -0
- data/lib/atatus/normalizers.rb +23 -9
- data/lib/atatus/normalizers/grape.rb +22 -0
- data/lib/atatus/normalizers/grape/endpoint_run.rb +65 -0
- data/lib/atatus/normalizers/rails.rb +27 -0
- data/lib/atatus/normalizers/rails/action_controller.rb +44 -0
- data/lib/atatus/normalizers/rails/action_mailer.rb +43 -0
- data/lib/atatus/normalizers/{action_view.rb → rails/action_view.rb} +17 -0
- data/lib/atatus/normalizers/rails/active_record.rb +80 -0
- data/lib/atatus/opentracing.rb +75 -42
- data/lib/atatus/rails.rb +29 -13
- data/lib/atatus/railtie.rb +19 -6
- data/lib/atatus/resque.rb +29 -0
- data/lib/atatus/sinatra.rb +53 -0
- data/lib/atatus/span.rb +44 -15
- data/lib/atatus/span/context.rb +43 -28
- data/lib/atatus/span/context/db.rb +43 -0
- data/lib/atatus/span/context/destination.rb +77 -0
- data/lib/atatus/span/context/http.rb +43 -0
- data/lib/atatus/span_helpers.rb +18 -1
- data/lib/atatus/spies.rb +33 -15
- data/lib/atatus/spies/action_dispatch.rb +27 -6
- data/lib/atatus/spies/delayed_job.rb +26 -5
- data/lib/atatus/spies/dynamo_db.rb +62 -0
- data/lib/atatus/spies/elasticsearch.rb +53 -7
- data/lib/atatus/spies/faraday.rb +54 -20
- data/lib/atatus/spies/http.rb +36 -6
- data/lib/atatus/spies/json.rb +18 -0
- data/lib/atatus/spies/mongo.rb +41 -10
- data/lib/atatus/spies/net_http.rb +52 -11
- data/lib/atatus/spies/rake.rb +42 -23
- data/lib/atatus/spies/redis.rb +17 -0
- data/lib/atatus/spies/resque.rb +57 -0
- data/lib/atatus/spies/sequel.rb +54 -17
- data/lib/atatus/spies/shoryuken.rb +69 -0
- data/lib/atatus/spies/sidekiq.rb +46 -25
- data/lib/atatus/spies/sinatra.rb +20 -4
- data/lib/atatus/spies/sneakers.rb +74 -0
- data/lib/atatus/spies/sucker_punch.rb +58 -0
- data/lib/atatus/spies/tilt.rb +20 -1
- data/lib/atatus/sql.rb +36 -0
- data/lib/atatus/sql/signature.rb +169 -0
- data/lib/atatus/sql/tokenizer.rb +264 -0
- data/lib/atatus/sql/tokens.rb +63 -0
- data/lib/atatus/sql_summarizer.rb +24 -6
- data/lib/atatus/stacktrace.rb +17 -0
- data/lib/atatus/stacktrace/frame.rb +17 -3
- data/lib/atatus/stacktrace_builder.rb +23 -3
- data/lib/atatus/subscriber.rb +23 -4
- data/lib/atatus/trace_context.rb +84 -51
- data/lib/atatus/trace_context/traceparent.rb +111 -0
- data/lib/atatus/trace_context/tracestate.rb +148 -0
- data/lib/atatus/transaction.rb +74 -18
- data/lib/atatus/transport/base.rb +44 -27
- data/lib/atatus/transport/connection.rb +28 -72
- data/lib/atatus/transport/connection/http.rb +58 -35
- data/lib/atatus/transport/connection/proxy_pipe.rb +24 -5
- data/lib/atatus/transport/filters.rb +18 -1
- data/lib/atatus/transport/filters/hash_sanitizer.rb +77 -0
- data/lib/atatus/transport/filters/secrets_filter.rb +30 -55
- data/lib/atatus/transport/headers.rb +83 -0
- data/lib/atatus/transport/serializers.rb +17 -5
- data/lib/atatus/transport/serializers/context_serializer.rb +30 -3
- data/lib/atatus/transport/serializers/error_serializer.rb +17 -2
- data/lib/atatus/transport/serializers/metadata_serializer.rb +44 -22
- data/lib/atatus/transport/serializers/metricset_serializer.rb +34 -6
- data/lib/atatus/transport/serializers/span_serializer.rb +47 -12
- data/lib/atatus/transport/serializers/transaction_serializer.rb +18 -2
- data/lib/atatus/transport/user_agent.rb +48 -0
- data/lib/atatus/transport/worker.rb +31 -7
- data/lib/atatus/util.rb +18 -1
- data/lib/atatus/util/inflector.rb +17 -0
- data/lib/atatus/util/lru_cache.rb +17 -0
- data/lib/atatus/util/throttle.rb +17 -0
- data/lib/atatus/version.rb +19 -1
- metadata +46 -26
- data/Rakefile +0 -19
- data/bench/.gitignore +0 -2
- data/bench/app.rb +0 -53
- data/bench/benchmark.rb +0 -36
- data/bench/report.rb +0 -55
- data/bench/rubyprof.rb +0 -39
- data/bench/stackprof.rb +0 -23
- data/bin/build_docs +0 -5
- data/bin/console +0 -15
- data/bin/setup +0 -8
- data/bin/with_framework +0 -7
- data/lib/atatus/metrics/vm.rb +0 -60
- data/lib/atatus/normalizers/action_controller.rb +0 -27
- data/lib/atatus/normalizers/action_mailer.rb +0 -26
- data/lib/atatus/normalizers/active_record.rb +0 -45
- data/lib/atatus/util/prefixed_logger.rb +0 -18
- data/vendor/.gitkeep +0 -0
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# Licensed to Elasticsearch B.V. under one or more contributor
|
|
2
|
+
# license agreements. See the NOTICE file distributed with
|
|
3
|
+
# this work for additional information regarding copyright
|
|
4
|
+
# ownership. Elasticsearch B.V. licenses this file to you under
|
|
5
|
+
# the Apache License, Version 2.0 (the "License"); you may
|
|
6
|
+
# not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
# frozen_string_literal: true
|
|
19
|
+
|
|
20
|
+
module Atatus
|
|
21
|
+
# @api private
|
|
22
|
+
module Spies
|
|
23
|
+
# @api private
|
|
24
|
+
class SuckerPunchSpy
|
|
25
|
+
TYPE = 'sucker_punch'
|
|
26
|
+
|
|
27
|
+
def install
|
|
28
|
+
if defined?(::SuckerPunch) && defined?(::SuckerPunch::Job) && defined?(::SuckerPunch::Job::ClassMethods)
|
|
29
|
+
|
|
30
|
+
::SuckerPunch::Job::ClassMethods.class_eval do
|
|
31
|
+
alias :__run_perform_without_atatus :__run_perform
|
|
32
|
+
|
|
33
|
+
def __run_perform(*args)
|
|
34
|
+
# This method is reached via JobClass#async_perform
|
|
35
|
+
# or JobClass#perform_in.
|
|
36
|
+
name = to_s
|
|
37
|
+
transaction = Atatus.start_transaction(name, TYPE)
|
|
38
|
+
__run_perform_without_atatus(*args)
|
|
39
|
+
transaction.done 'success'
|
|
40
|
+
rescue ::Exception => e
|
|
41
|
+
# Note that SuckerPunch by default doesn't raise the errors from
|
|
42
|
+
# the user-defined JobClass#perform method as it uses an error
|
|
43
|
+
# handler, accessed via `SuckerPunch.exception_handler`.
|
|
44
|
+
Atatus.report(e, handled: false)
|
|
45
|
+
transaction.done 'error'
|
|
46
|
+
raise
|
|
47
|
+
ensure
|
|
48
|
+
Atatus.end_transaction
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
register 'SuckerPunch', 'sucker_punch', SuckerPunchSpy.new
|
|
57
|
+
end
|
|
58
|
+
end
|
data/lib/atatus/spies/tilt.rb
CHANGED
|
@@ -1,3 +1,20 @@
|
|
|
1
|
+
# Licensed to Elasticsearch B.V. under one or more contributor
|
|
2
|
+
# license agreements. See the NOTICE file distributed with
|
|
3
|
+
# this work for additional information regarding copyright
|
|
4
|
+
# ownership. Elasticsearch B.V. licenses this file to you under
|
|
5
|
+
# the Apache License, Version 2.0 (the "License"); you may
|
|
6
|
+
# not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
1
18
|
# frozen_string_literal: true
|
|
2
19
|
|
|
3
20
|
module Atatus
|
|
@@ -8,7 +25,8 @@ module Atatus
|
|
|
8
25
|
TYPE = 'template.tilt'
|
|
9
26
|
|
|
10
27
|
def install
|
|
11
|
-
if defined?(::Tilt) && defined?(::Tilt::Template)
|
|
28
|
+
if defined?(::Tilt) && defined?(::Tilt::Template)
|
|
29
|
+
|
|
12
30
|
::Tilt::Template.class_eval do
|
|
13
31
|
alias render_without_apm render
|
|
14
32
|
|
|
@@ -20,6 +38,7 @@ module Atatus
|
|
|
20
38
|
end
|
|
21
39
|
end
|
|
22
40
|
end
|
|
41
|
+
|
|
23
42
|
end
|
|
24
43
|
end
|
|
25
44
|
end
|
data/lib/atatus/sql.rb
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Licensed to Elasticsearch B.V. under one or more contributor
|
|
2
|
+
# license agreements. See the NOTICE file distributed with
|
|
3
|
+
# this work for additional information regarding copyright
|
|
4
|
+
# ownership. Elasticsearch B.V. licenses this file to you under
|
|
5
|
+
# the Apache License, Version 2.0 (the "License"); you may
|
|
6
|
+
# not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
# frozen_string_literal: true
|
|
19
|
+
|
|
20
|
+
module Atatus
|
|
21
|
+
# @api private
|
|
22
|
+
module Sql
|
|
23
|
+
# This method is only here as a shortcut while the agent ships with
|
|
24
|
+
# both implementations ~mikker
|
|
25
|
+
def self.summarizer
|
|
26
|
+
@summarizer ||=
|
|
27
|
+
if Atatus.agent&.config&.use_legacy_sql_parser
|
|
28
|
+
require 'atatus/sql_summarizer'
|
|
29
|
+
SqlSummarizer.new
|
|
30
|
+
else
|
|
31
|
+
require 'atatus/sql/signature'
|
|
32
|
+
Sql::Signature::Summarizer.new
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# Licensed to Elasticsearch B.V. under one or more contributor
|
|
2
|
+
# license agreements. See the NOTICE file distributed with
|
|
3
|
+
# this work for additional information regarding copyright
|
|
4
|
+
# ownership. Elasticsearch B.V. licenses this file to you under
|
|
5
|
+
# the Apache License, Version 2.0 (the "License"); you may
|
|
6
|
+
# not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
# frozen_string_literal: true
|
|
19
|
+
|
|
20
|
+
require 'atatus/sql/tokenizer'
|
|
21
|
+
|
|
22
|
+
module Atatus
|
|
23
|
+
module Sql
|
|
24
|
+
# @api private
|
|
25
|
+
class Signature
|
|
26
|
+
include Tokens
|
|
27
|
+
|
|
28
|
+
# Mostly here to provide a similar API to new SqlSummarizer for easier
|
|
29
|
+
# swapping out
|
|
30
|
+
#
|
|
31
|
+
# @api private
|
|
32
|
+
class Summarizer
|
|
33
|
+
def summarize(sql)
|
|
34
|
+
Signature.parse(sql)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def initialize(sql)
|
|
39
|
+
@sql = sql
|
|
40
|
+
@tokenizer = Tokenizer.new(sql)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def parse
|
|
44
|
+
@tokenizer.scan # until tokenizer.token != COMMENT
|
|
45
|
+
|
|
46
|
+
parsed = parse_tokens
|
|
47
|
+
return parsed if parsed
|
|
48
|
+
|
|
49
|
+
# If all else fails, just return the first token of the query.
|
|
50
|
+
parts = @sql.split
|
|
51
|
+
return '' unless parts.any?
|
|
52
|
+
|
|
53
|
+
parts.first.upcase
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def self.parse(sql)
|
|
57
|
+
new(sql).parse
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
private
|
|
61
|
+
|
|
62
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
|
63
|
+
# rubocop:disable Metrics/PerceivedComplexity
|
|
64
|
+
def parse_tokens
|
|
65
|
+
t = @tokenizer
|
|
66
|
+
|
|
67
|
+
case t.token
|
|
68
|
+
|
|
69
|
+
when CALL
|
|
70
|
+
return unless scan_until IDENT
|
|
71
|
+
"CALL #{t.text}"
|
|
72
|
+
|
|
73
|
+
when DELETE
|
|
74
|
+
return unless scan_until FROM
|
|
75
|
+
return unless scan_token IDENT
|
|
76
|
+
table = scan_dotted_identifier
|
|
77
|
+
"DELETE FROM #{table}"
|
|
78
|
+
|
|
79
|
+
when INSERT, REPLACE
|
|
80
|
+
action = t.text
|
|
81
|
+
return unless scan_until INTO
|
|
82
|
+
return unless scan_token IDENT
|
|
83
|
+
table = scan_dotted_identifier
|
|
84
|
+
"#{action} INTO #{table}"
|
|
85
|
+
|
|
86
|
+
when SELECT
|
|
87
|
+
level = 0
|
|
88
|
+
while t.scan
|
|
89
|
+
case t.token
|
|
90
|
+
when LPAREN then level += 1
|
|
91
|
+
when RPAREN then level -= 1
|
|
92
|
+
when FROM
|
|
93
|
+
next unless level == 0
|
|
94
|
+
break unless scan_token IDENT
|
|
95
|
+
table = scan_dotted_identifier
|
|
96
|
+
return "SELECT FROM #{table}"
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
when UPDATE
|
|
101
|
+
# Scan for the table name. Some dialects allow option keywords before
|
|
102
|
+
# the table name.
|
|
103
|
+
return 'UPDATE' unless scan_token IDENT
|
|
104
|
+
|
|
105
|
+
table = t.text
|
|
106
|
+
|
|
107
|
+
period = false
|
|
108
|
+
first_period = false
|
|
109
|
+
|
|
110
|
+
while t.scan
|
|
111
|
+
case t.token
|
|
112
|
+
when IDENT
|
|
113
|
+
if period
|
|
114
|
+
table += t.text
|
|
115
|
+
period = false
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
unless first_period
|
|
119
|
+
table = t.text
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Two adjacent identifiers found after the first period. Ignore
|
|
123
|
+
# the secondary ones, in case they are unknown keywords.
|
|
124
|
+
when PERIOD
|
|
125
|
+
period = true
|
|
126
|
+
first_period = true
|
|
127
|
+
table += '.'
|
|
128
|
+
else
|
|
129
|
+
return "UPDATE #{table}"
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
|
135
|
+
# rubocop:enable Metrics/PerceivedComplexity
|
|
136
|
+
|
|
137
|
+
# Scans until finding token of `kind`
|
|
138
|
+
def scan_until(kind)
|
|
139
|
+
while @tokenizer.scan
|
|
140
|
+
break true if @tokenizer.token == kind
|
|
141
|
+
false
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Scans next token, ignoring comments
|
|
146
|
+
# Returns whether next token is of `kind`
|
|
147
|
+
def scan_token(kind)
|
|
148
|
+
while @tokenizer.scan
|
|
149
|
+
next if @tokenizer.token == COMMENT
|
|
150
|
+
break
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
return true if @tokenizer.token == kind
|
|
154
|
+
|
|
155
|
+
false
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def scan_dotted_identifier
|
|
159
|
+
table = @tokenizer.text
|
|
160
|
+
|
|
161
|
+
while scan_token(PERIOD) && scan_token(IDENT)
|
|
162
|
+
table += ".#{@tokenizer.text}"
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
table
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
# Licensed to Elasticsearch B.V. under one or more contributor
|
|
2
|
+
# license agreements. See the NOTICE file distributed with
|
|
3
|
+
# this work for additional information regarding copyright
|
|
4
|
+
# ownership. Elasticsearch B.V. licenses this file to you under
|
|
5
|
+
# the Apache License, Version 2.0 (the "License"); you may
|
|
6
|
+
# not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
# frozen_string_literal: true
|
|
19
|
+
|
|
20
|
+
require 'strscan'
|
|
21
|
+
require 'atatus/sql/tokens'
|
|
22
|
+
|
|
23
|
+
module Atatus
|
|
24
|
+
module Sql
|
|
25
|
+
# @api private
|
|
26
|
+
class Tokenizer
|
|
27
|
+
include Tokens
|
|
28
|
+
|
|
29
|
+
ALPHA = /[[:alpha:]]/.freeze
|
|
30
|
+
DIGIT = /[[:digit:]]/.freeze
|
|
31
|
+
SPACE = /[[:space:]]+/.freeze
|
|
32
|
+
|
|
33
|
+
def initialize(input)
|
|
34
|
+
@input = input
|
|
35
|
+
|
|
36
|
+
@scanner = StringScanner.new(input)
|
|
37
|
+
@byte_start = 0
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
attr_reader :input, :scanner, :token
|
|
41
|
+
|
|
42
|
+
def text
|
|
43
|
+
@input.byteslice(@byte_start, @byte_end - @byte_start)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def scan
|
|
47
|
+
scanner.skip(SPACE)
|
|
48
|
+
|
|
49
|
+
@byte_start = scanner.pos
|
|
50
|
+
char = next_char
|
|
51
|
+
|
|
52
|
+
return false unless char
|
|
53
|
+
|
|
54
|
+
@token = next_token(char)
|
|
55
|
+
|
|
56
|
+
true
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private
|
|
60
|
+
|
|
61
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
|
62
|
+
def next_token(char)
|
|
63
|
+
case char
|
|
64
|
+
when '_' then scan_keyword_or_identifier(possible_keyword: false)
|
|
65
|
+
when '.' then PERIOD
|
|
66
|
+
when '$' then scan_dollar_sign
|
|
67
|
+
when '`' then scan_quoted_indentifier('`')
|
|
68
|
+
when '"' then scan_quoted_indentifier('"')
|
|
69
|
+
when '[' then scan_quoted_indentifier(']')
|
|
70
|
+
when '(' then LPAREN
|
|
71
|
+
when ')' then RPAREN
|
|
72
|
+
when '/' then scan_bracketed_comment
|
|
73
|
+
when '-' then scan_simple_comment
|
|
74
|
+
when "'" then scan_string_literal
|
|
75
|
+
when ALPHA then scan_keyword_or_identifier(possible_keyword: true)
|
|
76
|
+
when DIGIT then scan_numeric_literal
|
|
77
|
+
else OTHER
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
|
81
|
+
|
|
82
|
+
def next_char
|
|
83
|
+
char = @scanner.getch
|
|
84
|
+
@byte_end = @scanner.pos
|
|
85
|
+
char
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# StringScanner#peek returns next byte which could be an incomplete utf
|
|
89
|
+
# multi-byte character
|
|
90
|
+
def peek_char(length = 1)
|
|
91
|
+
# The maximum byte count of utf chars is 4:
|
|
92
|
+
# > In UTF-8, characters from the U+0000..U+10FFFF range (the UTF-16
|
|
93
|
+
# accessible range) are encoded using sequences of 1 to 4 octets.
|
|
94
|
+
# # https://tools.ietf.org/html/rfc3629
|
|
95
|
+
return nil if length > 4
|
|
96
|
+
|
|
97
|
+
char = @scanner.peek(length)
|
|
98
|
+
|
|
99
|
+
return nil if char.empty?
|
|
100
|
+
return char if char.valid_encoding?
|
|
101
|
+
|
|
102
|
+
peek_char(length + 1)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
106
|
+
def scan_keyword_or_identifier(possible_keyword:)
|
|
107
|
+
while (peek = peek_char)
|
|
108
|
+
if peek == '_' || peek == '$' || peek =~ DIGIT
|
|
109
|
+
possible_keyword = false
|
|
110
|
+
next next_char
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
next next_char if peek =~ ALPHA
|
|
114
|
+
|
|
115
|
+
break
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
return IDENT unless possible_keyword
|
|
119
|
+
|
|
120
|
+
snap = text
|
|
121
|
+
|
|
122
|
+
if snap.length < KEYWORD_MIN_LENGTH || snap.length > KEYWORD_MAX_LENGTH
|
|
123
|
+
return IDENT
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
keyword = KEYWORDS[snap.length].find { |kw| snap.upcase == kw.to_s }
|
|
127
|
+
return keyword if keyword
|
|
128
|
+
|
|
129
|
+
IDENT
|
|
130
|
+
end
|
|
131
|
+
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
132
|
+
|
|
133
|
+
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
134
|
+
def scan_dollar_sign
|
|
135
|
+
while (peek = peek_char)
|
|
136
|
+
case peek
|
|
137
|
+
when DIGIT
|
|
138
|
+
next_char while peek_char =~ DIGIT
|
|
139
|
+
when '$', '_', ALPHA, SPACE
|
|
140
|
+
# PostgreSQL supports dollar-quoted string literal syntax,
|
|
141
|
+
# like $foo$...$foo$. The tag (foo in this case) is optional,
|
|
142
|
+
# and if present follows identifier rules.
|
|
143
|
+
while (char = next_char)
|
|
144
|
+
case char
|
|
145
|
+
when '$'
|
|
146
|
+
# This marks the end of the initial $foo$.
|
|
147
|
+
snap = text
|
|
148
|
+
slice = input.slice(scanner.pos, input.length)
|
|
149
|
+
index = slice.index(snap)
|
|
150
|
+
next unless index && index >= 0
|
|
151
|
+
|
|
152
|
+
delta = index + snap.bytesize
|
|
153
|
+
@byte_end += delta
|
|
154
|
+
scanner.pos += delta
|
|
155
|
+
return STRING
|
|
156
|
+
when SPACE
|
|
157
|
+
# Unknown token starting with $, consume chars until space.
|
|
158
|
+
@byte_end -= char.bytesize
|
|
159
|
+
return OTHER
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
else break
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
OTHER
|
|
167
|
+
end
|
|
168
|
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
|
169
|
+
|
|
170
|
+
def scan_quoted_indentifier(delimiter)
|
|
171
|
+
while (char = next_char)
|
|
172
|
+
next unless char == delimiter
|
|
173
|
+
|
|
174
|
+
if delimiter == '"' && peek_char == delimiter
|
|
175
|
+
next next_char
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
break
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Remove quotes from identifier
|
|
182
|
+
@byte_start += char.bytesize
|
|
183
|
+
@byte_end -= char.bytesize
|
|
184
|
+
|
|
185
|
+
IDENT
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
|
189
|
+
def scan_bracketed_comment
|
|
190
|
+
return OTHER unless peek_char == '*'
|
|
191
|
+
|
|
192
|
+
nesting = 1
|
|
193
|
+
|
|
194
|
+
while (char = next_char)
|
|
195
|
+
case char
|
|
196
|
+
when '/'
|
|
197
|
+
next unless peek_char == '*'
|
|
198
|
+
next_char
|
|
199
|
+
nesting += 1
|
|
200
|
+
when '*'
|
|
201
|
+
next unless peek_char == '/'
|
|
202
|
+
next_char
|
|
203
|
+
nesting -= 1
|
|
204
|
+
return COMMENT if nesting == 0
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
|
209
|
+
|
|
210
|
+
def scan_simple_comment
|
|
211
|
+
return OTHER unless peek_char == '-'
|
|
212
|
+
|
|
213
|
+
while (char = next_char)
|
|
214
|
+
break if char == "\n"
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
COMMENT
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def scan_string_literal
|
|
221
|
+
delimiter = "'"
|
|
222
|
+
|
|
223
|
+
while (char = next_char)
|
|
224
|
+
if char == '\\'
|
|
225
|
+
# Skip escaped character, e.g. 'what\'s up?'
|
|
226
|
+
next_char
|
|
227
|
+
next
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
next unless char == delimiter
|
|
231
|
+
|
|
232
|
+
return STRING unless peek_char
|
|
233
|
+
return STRING if peek_char != delimiter
|
|
234
|
+
|
|
235
|
+
next_char
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
|
240
|
+
def scan_numeric_literal
|
|
241
|
+
period = false
|
|
242
|
+
exponent = false
|
|
243
|
+
|
|
244
|
+
while (peek = peek_char)
|
|
245
|
+
case peek
|
|
246
|
+
when DIGIT then next_char
|
|
247
|
+
when '.'
|
|
248
|
+
return NUMBER if period
|
|
249
|
+
next_char
|
|
250
|
+
period = true
|
|
251
|
+
when 'e', 'E'
|
|
252
|
+
return NUMBER if exponent
|
|
253
|
+
next_char
|
|
254
|
+
next_char if peek_char =~ /[+-]/
|
|
255
|
+
else break
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
NUMBER
|
|
260
|
+
end
|
|
261
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
end
|