elastic-apm 3.3.0 → 3.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (169) hide show
  1. checksums.yaml +4 -4
  2. data/.ci/.jenkins_exclude.yml +43 -24
  3. data/.ci/.jenkins_framework.yml +2 -2
  4. data/.ci/.jenkins_ruby.yml +1 -1
  5. data/.ci/Jenkinsfile +288 -170
  6. data/.ci/docker/jruby/11-jdk/Dockerfile +40 -0
  7. data/.ci/docker/jruby/12-jdk/Dockerfile +40 -0
  8. data/.ci/docker/jruby/13-jdk/Dockerfile +40 -0
  9. data/.ci/docker/jruby/7-jdk/Dockerfile +40 -0
  10. data/.ci/docker/jruby/8-jdk/Dockerfile +40 -0
  11. data/.ci/docker/jruby/README.md +31 -0
  12. data/.ci/docker/jruby/run.sh +73 -0
  13. data/.ci/docker/jruby/test.sh +13 -0
  14. data/.ci/jobs/apm-agent-ruby-downstream.yml +1 -0
  15. data/.ci/jobs/apm-agent-ruby-linting-mbp.yml +1 -0
  16. data/.ci/jobs/apm-agent-ruby-mbp.yml +1 -0
  17. data/.ci/jobs/defaults.yml +1 -1
  18. data/.ci/packer_cache.sh +16 -0
  19. data/.github/ISSUE_TEMPLATE/Bug_report.md +40 -0
  20. data/.github/ISSUE_TEMPLATE/Feature_request.md +17 -0
  21. data/.github/PULL_REQUEST_TEMPLATE.md +60 -0
  22. data/.gitignore +8 -0
  23. data/.rspec +0 -1
  24. data/.rubocop.yml +18 -0
  25. data/CHANGELOG.asciidoc +104 -1
  26. data/CONTRIBUTING.md +6 -48
  27. data/Gemfile +38 -10
  28. data/README.md +62 -13
  29. data/Rakefile +37 -5
  30. data/bench/sql.rb +49 -0
  31. data/bin/build_docs +1 -1
  32. data/bin/run-tests +4 -1
  33. data/docker-compose.yml +7 -0
  34. data/docs/api.asciidoc +52 -3
  35. data/docs/configuration.asciidoc +171 -22
  36. data/docs/getting-started-rails.asciidoc +2 -0
  37. data/docs/graphql.asciidoc +23 -0
  38. data/docs/images/dynamic-config.svg +1 -0
  39. data/docs/index.asciidoc +6 -1
  40. data/docs/introduction.asciidoc +2 -1
  41. data/docs/performance-tuning.asciidoc +106 -0
  42. data/docs/set-up.asciidoc +5 -2
  43. data/docs/supported-technologies.asciidoc +86 -1
  44. data/docs/upgrading.asciidoc +45 -0
  45. data/elastic-apm.gemspec +17 -0
  46. data/lib/elastic-apm.rb +17 -0
  47. data/lib/elastic_apm.rb +58 -5
  48. data/lib/elastic_apm/agent.rb +55 -4
  49. data/lib/elastic_apm/central_config.rb +27 -8
  50. data/lib/elastic_apm/central_config/cache_control.rb +17 -0
  51. data/lib/elastic_apm/child_durations.rb +26 -4
  52. data/lib/elastic_apm/config.rb +86 -5
  53. data/lib/elastic_apm/config/bytes.rb +17 -0
  54. data/lib/elastic_apm/config/duration.rb +17 -0
  55. data/lib/elastic_apm/config/options.rb +21 -5
  56. data/lib/elastic_apm/config/regexp_list.rb +17 -0
  57. data/lib/elastic_apm/config/wildcard_pattern_list.rb +20 -1
  58. data/lib/elastic_apm/context.rb +17 -0
  59. data/lib/elastic_apm/context/request.rb +17 -0
  60. data/lib/elastic_apm/context/request/socket.rb +17 -0
  61. data/lib/elastic_apm/context/request/url.rb +17 -0
  62. data/lib/elastic_apm/context/response.rb +27 -2
  63. data/lib/elastic_apm/context/user.rb +17 -0
  64. data/lib/elastic_apm/context_builder.rb +17 -0
  65. data/lib/elastic_apm/deprecations.rb +17 -0
  66. data/lib/elastic_apm/error.rb +17 -0
  67. data/lib/elastic_apm/error/exception.rb +17 -0
  68. data/lib/elastic_apm/error/log.rb +17 -0
  69. data/lib/elastic_apm/error_builder.rb +17 -0
  70. data/lib/elastic_apm/grape.rb +17 -0
  71. data/lib/elastic_apm/graphql.rb +91 -0
  72. data/lib/elastic_apm/grpc.rb +99 -0
  73. data/lib/elastic_apm/instrumenter.rb +49 -15
  74. data/lib/elastic_apm/internal_error.rb +17 -0
  75. data/lib/elastic_apm/logging.rb +17 -0
  76. data/lib/elastic_apm/metadata.rb +17 -0
  77. data/lib/elastic_apm/metadata/process_info.rb +17 -0
  78. data/lib/elastic_apm/metadata/service_info.rb +22 -2
  79. data/lib/elastic_apm/metadata/system_info.rb +17 -0
  80. data/lib/elastic_apm/metadata/system_info/container_info.rb +17 -0
  81. data/lib/elastic_apm/metrics.rb +33 -1
  82. data/lib/elastic_apm/metrics/breakdown_set.rb +17 -0
  83. data/lib/elastic_apm/metrics/cpu_mem_set.rb +17 -0
  84. data/lib/elastic_apm/metrics/metric.rb +23 -4
  85. data/lib/elastic_apm/metrics/set.rb +17 -0
  86. data/lib/elastic_apm/metrics/span_scoped_set.rb +17 -0
  87. data/lib/elastic_apm/metrics/transaction_set.rb +17 -0
  88. data/lib/elastic_apm/metrics/vm_set.rb +17 -0
  89. data/lib/elastic_apm/metricset.rb +17 -0
  90. data/lib/elastic_apm/middleware.rb +20 -4
  91. data/lib/elastic_apm/naively_hashable.rb +17 -0
  92. data/lib/elastic_apm/normalizers.rb +17 -0
  93. data/lib/elastic_apm/normalizers/grape.rb +17 -0
  94. data/lib/elastic_apm/normalizers/grape/endpoint_run.rb +18 -1
  95. data/lib/elastic_apm/normalizers/rails.rb +17 -0
  96. data/lib/elastic_apm/normalizers/rails/action_controller.rb +17 -0
  97. data/lib/elastic_apm/normalizers/rails/action_mailer.rb +17 -0
  98. data/lib/elastic_apm/normalizers/rails/action_view.rb +17 -0
  99. data/lib/elastic_apm/normalizers/rails/active_record.rb +29 -5
  100. data/lib/elastic_apm/opentracing.rb +66 -24
  101. data/lib/elastic_apm/rails.rb +18 -10
  102. data/lib/elastic_apm/railtie.rb +18 -1
  103. data/lib/elastic_apm/resque.rb +29 -0
  104. data/lib/elastic_apm/sinatra.rb +17 -0
  105. data/lib/elastic_apm/span.rb +20 -2
  106. data/lib/elastic_apm/span/context.rb +43 -44
  107. data/lib/elastic_apm/span/context/db.rb +43 -0
  108. data/lib/elastic_apm/span/context/destination.rb +77 -0
  109. data/lib/elastic_apm/span/context/http.rb +43 -0
  110. data/lib/elastic_apm/span_helpers.rb +17 -0
  111. data/lib/elastic_apm/spies.rb +33 -14
  112. data/lib/elastic_apm/spies/action_dispatch.rb +17 -0
  113. data/lib/elastic_apm/spies/delayed_job.rb +17 -0
  114. data/lib/elastic_apm/spies/elasticsearch.rb +49 -5
  115. data/lib/elastic_apm/spies/faraday.rb +53 -18
  116. data/lib/elastic_apm/spies/http.rb +35 -3
  117. data/lib/elastic_apm/spies/json.rb +17 -0
  118. data/lib/elastic_apm/spies/mongo.rb +23 -1
  119. data/lib/elastic_apm/spies/net_http.rb +50 -8
  120. data/lib/elastic_apm/spies/rake.rb +17 -0
  121. data/lib/elastic_apm/spies/redis.rb +17 -0
  122. data/lib/elastic_apm/spies/resque.rb +53 -0
  123. data/lib/elastic_apm/spies/sequel.rb +52 -15
  124. data/lib/elastic_apm/spies/shoryuken.rb +65 -0
  125. data/lib/elastic_apm/spies/sidekiq.rb +17 -0
  126. data/lib/elastic_apm/spies/sinatra.rb +17 -0
  127. data/lib/elastic_apm/spies/sneakers.rb +74 -0
  128. data/lib/elastic_apm/spies/sucker_punch.rb +54 -0
  129. data/lib/elastic_apm/spies/tilt.rb +17 -0
  130. data/lib/elastic_apm/sql.rb +36 -0
  131. data/lib/elastic_apm/sql/signature.rb +169 -0
  132. data/lib/elastic_apm/sql/tokenizer.rb +264 -0
  133. data/lib/elastic_apm/sql/tokens.rb +63 -0
  134. data/lib/elastic_apm/sql_summarizer.rb +18 -2
  135. data/lib/elastic_apm/stacktrace.rb +17 -0
  136. data/lib/elastic_apm/stacktrace/frame.rb +17 -0
  137. data/lib/elastic_apm/stacktrace_builder.rb +23 -1
  138. data/lib/elastic_apm/subscriber.rb +17 -0
  139. data/lib/elastic_apm/trace_context.rb +85 -49
  140. data/lib/elastic_apm/trace_context/traceparent.rb +113 -0
  141. data/lib/elastic_apm/trace_context/tracestate.rb +43 -0
  142. data/lib/elastic_apm/transaction.rb +43 -15
  143. data/lib/elastic_apm/transport/base.rb +39 -23
  144. data/lib/elastic_apm/transport/connection.rb +17 -0
  145. data/lib/elastic_apm/transport/connection/http.rb +17 -0
  146. data/lib/elastic_apm/transport/connection/proxy_pipe.rb +24 -2
  147. data/lib/elastic_apm/transport/filters.rb +17 -0
  148. data/lib/elastic_apm/transport/filters/hash_sanitizer.rb +77 -0
  149. data/lib/elastic_apm/transport/filters/secrets_filter.rb +29 -53
  150. data/lib/elastic_apm/transport/headers.rb +21 -0
  151. data/lib/elastic_apm/transport/serializers.rb +17 -0
  152. data/lib/elastic_apm/transport/serializers/context_serializer.rb +17 -0
  153. data/lib/elastic_apm/transport/serializers/error_serializer.rb +17 -0
  154. data/lib/elastic_apm/transport/serializers/metadata_serializer.rb +44 -20
  155. data/lib/elastic_apm/transport/serializers/metricset_serializer.rb +17 -0
  156. data/lib/elastic_apm/transport/serializers/span_serializer.rb +45 -8
  157. data/lib/elastic_apm/transport/serializers/transaction_serializer.rb +17 -0
  158. data/lib/elastic_apm/transport/user_agent.rb +17 -0
  159. data/lib/elastic_apm/transport/worker.rb +17 -0
  160. data/lib/elastic_apm/util.rb +17 -0
  161. data/lib/elastic_apm/util/inflector.rb +17 -0
  162. data/lib/elastic_apm/util/lru_cache.rb +17 -0
  163. data/lib/elastic_apm/util/throttle.rb +17 -0
  164. data/lib/elastic_apm/version.rb +18 -1
  165. metadata +36 -7
  166. data/.ci/downstreamTests.groovy +0 -185
  167. data/.ci/prepare-git-context.sh +0 -24
  168. data/.github/workflows/main.yml +0 -14
  169. data/CHANGELOG.md +0 -1
@@ -1,3 +1,20 @@
1
+ # Licensed to Elasticsearch B.V. under one or more contributor
2
+ # license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright
4
+ # ownership. Elasticsearch B.V. licenses this file to you under
5
+ # the Apache License, Version 2.0 (the "License"); you may
6
+ # not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
1
18
  # frozen_string_literal: true
2
19
 
3
20
  module ElasticAPM
@@ -1,3 +1,20 @@
1
+ # Licensed to Elasticsearch B.V. under one or more contributor
2
+ # license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright
4
+ # ownership. Elasticsearch B.V. licenses this file to you under
5
+ # the Apache License, Version 2.0 (the "License"); you may
6
+ # not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
1
18
  # frozen_string_literal: true
2
19
 
3
20
  module ElasticAPM
@@ -0,0 +1,74 @@
1
+ # Licensed to Elasticsearch B.V. under one or more contributor
2
+ # license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright
4
+ # ownership. Elasticsearch B.V. licenses this file to you under
5
+ # the Apache License, Version 2.0 (the "License"); you may
6
+ # not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ # frozen_string_literal: true
19
+
20
+ module ElasticAPM
21
+ # @api private
22
+ module Spies
23
+ # @api private
24
+ class SneakersSpy
25
+ include Logging
26
+
27
+ def self.supported_version?
28
+ Gem.loaded_specs['sneakers'].version >= Gem::Version.create('2.12.0')
29
+ end
30
+
31
+ def install
32
+ unless SneakersSpy.supported_version?
33
+ warn(
34
+ 'Sneakers version is below 2.12.0. Sneakers spy installation failed'
35
+ )
36
+ return
37
+ end
38
+
39
+ Sneakers.middleware.use(Middleware, nil)
40
+ end
41
+
42
+ # @api private
43
+ class Middleware
44
+ def initialize(app, *args)
45
+ @app = app
46
+ @args = args
47
+ end
48
+
49
+ def call(deserialized_msg, delivery_info, metadata, handler)
50
+ transaction =
51
+ ElasticAPM.start_transaction(
52
+ delivery_info.consumer.queue.name,
53
+ 'Sneakers'
54
+ )
55
+
56
+ ElasticAPM.set_label(:routing_key, delivery_info.routing_key)
57
+
58
+ res = @app.call(deserialized_msg, delivery_info, metadata, handler)
59
+ transaction&.done(:success)
60
+
61
+ res
62
+ rescue ::Exception => e
63
+ ElasticAPM.report(e, handled: false)
64
+ transaction&.done(:error)
65
+ raise
66
+ ensure
67
+ ElasticAPM.end_transaction
68
+ end
69
+ end
70
+ end
71
+
72
+ register 'Sneakers', 'sneakers', SneakersSpy.new
73
+ end
74
+ end
@@ -0,0 +1,54 @@
1
+ # Licensed to Elasticsearch B.V. under one or more contributor
2
+ # license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright
4
+ # ownership. Elasticsearch B.V. licenses this file to you under
5
+ # the Apache License, Version 2.0 (the "License"); you may
6
+ # not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ # frozen_string_literal: true
19
+
20
+ module ElasticAPM
21
+ # @api private
22
+ module Spies
23
+ # @api private
24
+ class SuckerPunchSpy
25
+ TYPE = 'sucker_punch'
26
+
27
+ def install
28
+ ::SuckerPunch::Job::ClassMethods.class_eval do
29
+ alias :__run_perform_without_elastic_apm :__run_perform
30
+
31
+ def __run_perform(*args)
32
+ # This method is reached via JobClass#async_perform
33
+ # or JobClass#perform_in.
34
+ name = to_s
35
+ transaction = ElasticAPM.start_transaction(name, TYPE)
36
+ __run_perform_without_elastic_apm(*args)
37
+ transaction.done 'success'
38
+ rescue ::Exception => e
39
+ # Note that SuckerPunch by default doesn't raise the errors from
40
+ # the user-defined JobClass#perform method as it uses an error
41
+ # handler, accessed via `SuckerPunch.exception_handler`.
42
+ ElasticAPM.report(e, handled: false)
43
+ transaction.done 'error'
44
+ raise
45
+ ensure
46
+ ElasticAPM.end_transaction
47
+ end
48
+ end
49
+ end
50
+ end
51
+
52
+ register 'SuckerPunch', 'sucker_punch', SuckerPunchSpy.new
53
+ end
54
+ end
@@ -1,3 +1,20 @@
1
+ # Licensed to Elasticsearch B.V. under one or more contributor
2
+ # license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright
4
+ # ownership. Elasticsearch B.V. licenses this file to you under
5
+ # the Apache License, Version 2.0 (the "License"); you may
6
+ # not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
1
18
  # frozen_string_literal: true
2
19
 
3
20
  module ElasticAPM
@@ -0,0 +1,36 @@
1
+ # Licensed to Elasticsearch B.V. under one or more contributor
2
+ # license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright
4
+ # ownership. Elasticsearch B.V. licenses this file to you under
5
+ # the Apache License, Version 2.0 (the "License"); you may
6
+ # not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ # frozen_string_literal: true
19
+
20
+ module ElasticAPM
21
+ # @api private
22
+ module Sql
23
+ # This method is only here as a shortcut while the agent ships with
24
+ # both implementations ~mikker
25
+ def self.summarizer
26
+ @summarizer ||=
27
+ if ElasticAPM.agent&.config&.use_legacy_sql_parser
28
+ require 'elastic_apm/sql_summarizer'
29
+ SqlSummarizer.new
30
+ else
31
+ require 'elastic_apm/sql/signature'
32
+ Sql::Signature::Summarizer.new
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,169 @@
1
+ # Licensed to Elasticsearch B.V. under one or more contributor
2
+ # license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright
4
+ # ownership. Elasticsearch B.V. licenses this file to you under
5
+ # the Apache License, Version 2.0 (the "License"); you may
6
+ # not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ # frozen_string_literal: true
19
+
20
+ require 'elastic_apm/sql/tokenizer'
21
+
22
+ module ElasticAPM
23
+ module Sql
24
+ # @api private
25
+ class Signature
26
+ include Tokens
27
+
28
+ # Mostly here to provide a similar API to new SqlSummarizer for easier
29
+ # swapping out
30
+ #
31
+ # @api private
32
+ class Summarizer
33
+ def summarize(sql)
34
+ Signature.parse(sql)
35
+ end
36
+ end
37
+
38
+ def initialize(sql)
39
+ @sql = sql
40
+ @tokenizer = Tokenizer.new(sql)
41
+ end
42
+
43
+ def parse
44
+ @tokenizer.scan # until tokenizer.token != COMMENT
45
+
46
+ parsed = parse_tokens
47
+ return parsed if parsed
48
+
49
+ # If all else fails, just return the first token of the query.
50
+ parts = @sql.split
51
+ return '' unless parts.any?
52
+
53
+ parts.first.upcase
54
+ end
55
+
56
+ def self.parse(sql)
57
+ new(sql).parse
58
+ end
59
+
60
+ private
61
+
62
+ # rubocop:disable Metrics/CyclomaticComplexity
63
+ # rubocop:disable Metrics/PerceivedComplexity
64
+ def parse_tokens
65
+ t = @tokenizer
66
+
67
+ case t.token
68
+
69
+ when CALL
70
+ return unless scan_until IDENT
71
+ "CALL #{t.text}"
72
+
73
+ when DELETE
74
+ return unless scan_until FROM
75
+ return unless scan_token IDENT
76
+ table = scan_dotted_identifier
77
+ "DELETE FROM #{table}"
78
+
79
+ when INSERT, REPLACE
80
+ action = t.text
81
+ return unless scan_until INTO
82
+ return unless scan_token IDENT
83
+ table = scan_dotted_identifier
84
+ "#{action} INTO #{table}"
85
+
86
+ when SELECT
87
+ level = 0
88
+ while t.scan
89
+ case t.token
90
+ when LPAREN then level += 1
91
+ when RPAREN then level -= 1
92
+ when FROM
93
+ next unless level == 0
94
+ break unless scan_token IDENT
95
+ table = scan_dotted_identifier
96
+ return "SELECT FROM #{table}"
97
+ end
98
+ end
99
+
100
+ when UPDATE
101
+ # Scan for the table name. Some dialects allow option keywords before
102
+ # the table name.
103
+ return 'UPDATE' unless scan_token IDENT
104
+
105
+ table = t.text
106
+
107
+ period = false
108
+ first_period = false
109
+
110
+ while t.scan
111
+ case t.token
112
+ when IDENT
113
+ if period
114
+ table += t.text
115
+ period = false
116
+ end
117
+
118
+ unless first_period
119
+ table = t.text
120
+ end
121
+
122
+ # Two adjacent identifiers found after the first period. Ignore
123
+ # the secondary ones, in case they are unknown keywords.
124
+ when PERIOD
125
+ period = true
126
+ first_period = true
127
+ table += '.'
128
+ else
129
+ return "UPDATE #{table}"
130
+ end
131
+ end
132
+ end
133
+ end
134
+ # rubocop:enable Metrics/CyclomaticComplexity
135
+ # rubocop:enable Metrics/PerceivedComplexity
136
+
137
+ # Scans until finding token of `kind`
138
+ def scan_until(kind)
139
+ while @tokenizer.scan
140
+ break true if @tokenizer.token == kind
141
+ false
142
+ end
143
+ end
144
+
145
+ # Scans next token, ignoring comments
146
+ # Returns whether next token is of `kind`
147
+ def scan_token(kind)
148
+ while @tokenizer.scan
149
+ next if @tokenizer.token == COMMENT
150
+ break
151
+ end
152
+
153
+ return true if @tokenizer.token == kind
154
+
155
+ false
156
+ end
157
+
158
+ def scan_dotted_identifier
159
+ table = @tokenizer.text
160
+
161
+ while scan_token(PERIOD) && scan_token(IDENT)
162
+ table += ".#{@tokenizer.text}"
163
+ end
164
+
165
+ table
166
+ end
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,264 @@
1
+ # Licensed to Elasticsearch B.V. under one or more contributor
2
+ # license agreements. See the NOTICE file distributed with
3
+ # this work for additional information regarding copyright
4
+ # ownership. Elasticsearch B.V. licenses this file to you under
5
+ # the Apache License, Version 2.0 (the "License"); you may
6
+ # not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ # frozen_string_literal: true
19
+
20
+ require 'strscan'
21
+ require 'elastic_apm/sql/tokens'
22
+
23
+ module ElasticAPM
24
+ module Sql
25
+ # @api private
26
+ class Tokenizer
27
+ include Tokens
28
+
29
+ ALPHA = /[[:alpha:]]/.freeze
30
+ DIGIT = /[[:digit:]]/.freeze
31
+ SPACE = /[[:space:]]+/.freeze
32
+
33
+ def initialize(input)
34
+ @input = input
35
+
36
+ @scanner = StringScanner.new(input)
37
+ @byte_start = 0
38
+ end
39
+
40
+ attr_reader :input, :scanner, :token
41
+
42
+ def text
43
+ @input.byteslice(@byte_start, @byte_end - @byte_start)
44
+ end
45
+
46
+ def scan
47
+ scanner.skip(SPACE)
48
+
49
+ @byte_start = scanner.pos
50
+ char = next_char
51
+
52
+ return false unless char
53
+
54
+ @token = next_token(char)
55
+
56
+ true
57
+ end
58
+
59
+ private
60
+
61
+ # rubocop:disable Metrics/CyclomaticComplexity
62
+ def next_token(char)
63
+ case char
64
+ when '_' then scan_keyword_or_identifier(possible_keyword: false)
65
+ when '.' then PERIOD
66
+ when '$' then scan_dollar_sign
67
+ when '`' then scan_quoted_indentifier('`')
68
+ when '"' then scan_quoted_indentifier('"')
69
+ when '[' then scan_quoted_indentifier(']')
70
+ when '(' then LPAREN
71
+ when ')' then RPAREN
72
+ when '/' then scan_bracketed_comment
73
+ when '-' then scan_simple_comment
74
+ when "'" then scan_string_literal
75
+ when ALPHA then scan_keyword_or_identifier(possible_keyword: true)
76
+ when DIGIT then scan_numeric_literal
77
+ else OTHER
78
+ end
79
+ end
80
+ # rubocop:enable Metrics/CyclomaticComplexity
81
+
82
+ def next_char
83
+ char = @scanner.getch
84
+ @byte_end = @scanner.pos
85
+ char
86
+ end
87
+
88
+ # StringScanner#peek returns next byte which could be an incomplete utf
89
+ # multi-byte character
90
+ def peek_char(length = 1)
91
+ # The maximum byte count of utf chars is 4:
92
+ # > In UTF-8, characters from the U+0000..U+10FFFF range (the UTF-16
93
+ # accessible range) are encoded using sequences of 1 to 4 octets.
94
+ # # https://tools.ietf.org/html/rfc3629
95
+ return nil if length > 4
96
+
97
+ char = @scanner.peek(length)
98
+
99
+ return nil if char.empty?
100
+ return char if char.valid_encoding?
101
+
102
+ peek_char(length + 1)
103
+ end
104
+
105
+ # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
106
+ def scan_keyword_or_identifier(possible_keyword:)
107
+ while (peek = peek_char)
108
+ if peek == '_' || peek == '$' || peek =~ DIGIT
109
+ possible_keyword = false
110
+ next next_char
111
+ end
112
+
113
+ next next_char if peek =~ ALPHA
114
+
115
+ break
116
+ end
117
+
118
+ return IDENT unless possible_keyword
119
+
120
+ snap = text
121
+
122
+ if snap.length < KEYWORD_MIN_LENGTH || snap.length > KEYWORD_MAX_LENGTH
123
+ return IDENT
124
+ end
125
+
126
+ keyword = KEYWORDS[snap.length].find { |kw| snap.upcase == kw.to_s }
127
+ return keyword if keyword
128
+
129
+ IDENT
130
+ end
131
+ # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
132
+
133
+ # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
134
+ def scan_dollar_sign
135
+ while (peek = peek_char)
136
+ case peek
137
+ when DIGIT
138
+ next_char while peek_char =~ DIGIT
139
+ when '$', '_', ALPHA, SPACE
140
+ # PostgreSQL supports dollar-quoted string literal syntax,
141
+ # like $foo$...$foo$. The tag (foo in this case) is optional,
142
+ # and if present follows identifier rules.
143
+ while (char = next_char)
144
+ case char
145
+ when '$'
146
+ # This marks the end of the initial $foo$.
147
+ snap = text
148
+ slice = input.slice(scanner.pos, input.length)
149
+ index = slice.index(snap)
150
+ next unless index && index >= 0
151
+
152
+ delta = index + snap.bytesize
153
+ @byte_end += delta
154
+ scanner.pos += delta
155
+ return STRING
156
+ when SPACE
157
+ # Unknown token starting with $, consume chars until space.
158
+ @byte_end -= char.bytesize
159
+ return OTHER
160
+ end
161
+ end
162
+ else break
163
+ end
164
+ end
165
+
166
+ OTHER
167
+ end
168
+ # rubocop:enable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
169
+
170
+ def scan_quoted_indentifier(delimiter)
171
+ while (char = next_char)
172
+ next unless char == delimiter
173
+
174
+ if delimiter == '"' && peek_char == delimiter
175
+ next next_char
176
+ end
177
+
178
+ break
179
+ end
180
+
181
+ # Remove quotes from identifier
182
+ @byte_start += char.bytesize
183
+ @byte_end -= char.bytesize
184
+
185
+ IDENT
186
+ end
187
+
188
+ # rubocop:disable Metrics/CyclomaticComplexity
189
+ def scan_bracketed_comment
190
+ return OTHER unless peek_char == '*'
191
+
192
+ nesting = 1
193
+
194
+ while (char = next_char)
195
+ case char
196
+ when '/'
197
+ next unless peek_char == '*'
198
+ next_char
199
+ nesting += 1
200
+ when '*'
201
+ next unless peek_char == '/'
202
+ next_char
203
+ nesting -= 1
204
+ return COMMENT if nesting == 0
205
+ end
206
+ end
207
+ end
208
+ # rubocop:enable Metrics/CyclomaticComplexity
209
+
210
+ def scan_simple_comment
211
+ return OTHER unless peek_char == '-'
212
+
213
+ while (char = next_char)
214
+ break if char == "\n"
215
+ end
216
+
217
+ COMMENT
218
+ end
219
+
220
+ def scan_string_literal
221
+ delimiter = "'"
222
+
223
+ while (char = next_char)
224
+ if char == '\\'
225
+ # Skip escaped character, e.g. 'what\'s up?'
226
+ next_char
227
+ next
228
+ end
229
+
230
+ next unless char == delimiter
231
+
232
+ return STRING unless peek_char
233
+ return STRING if peek_char != delimiter
234
+
235
+ next_char
236
+ end
237
+ end
238
+
239
+ # rubocop:disable Metrics/CyclomaticComplexity
240
+ def scan_numeric_literal
241
+ period = false
242
+ exponent = false
243
+
244
+ while (peek = peek_char)
245
+ case peek
246
+ when DIGIT then next_char
247
+ when '.'
248
+ return NUMBER if period
249
+ next_char
250
+ period = true
251
+ when 'e', 'E'
252
+ return NUMBER if exponent
253
+ next_char
254
+ next_char if peek_char =~ /[+-]/
255
+ else break
256
+ end
257
+ end
258
+
259
+ NUMBER
260
+ end
261
+ # rubocop:enable Metrics/CyclomaticComplexity
262
+ end
263
+ end
264
+ end