splunk-sdk-ruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. data/CHANGELOG.md +160 -0
  2. data/Gemfile +8 -0
  3. data/LICENSE +177 -0
  4. data/README.md +310 -0
  5. data/Rakefile +40 -0
  6. data/examples/1_connect.rb +51 -0
  7. data/examples/2_manage.rb +103 -0
  8. data/examples/3_blocking_searches.rb +82 -0
  9. data/examples/4_asynchronous_searches.rb +79 -0
  10. data/examples/5_stream_data_to_splunk.rb +79 -0
  11. data/lib/splunk-sdk-ruby.rb +47 -0
  12. data/lib/splunk-sdk-ruby/ambiguous_entity_reference.rb +28 -0
  13. data/lib/splunk-sdk-ruby/atomfeed.rb +323 -0
  14. data/lib/splunk-sdk-ruby/collection.rb +417 -0
  15. data/lib/splunk-sdk-ruby/collection/apps.rb +35 -0
  16. data/lib/splunk-sdk-ruby/collection/case_insensitive_collection.rb +58 -0
  17. data/lib/splunk-sdk-ruby/collection/configuration_file.rb +50 -0
  18. data/lib/splunk-sdk-ruby/collection/configurations.rb +80 -0
  19. data/lib/splunk-sdk-ruby/collection/jobs.rb +136 -0
  20. data/lib/splunk-sdk-ruby/collection/messages.rb +51 -0
  21. data/lib/splunk-sdk-ruby/context.rb +522 -0
  22. data/lib/splunk-sdk-ruby/entity.rb +260 -0
  23. data/lib/splunk-sdk-ruby/entity/index.rb +191 -0
  24. data/lib/splunk-sdk-ruby/entity/job.rb +339 -0
  25. data/lib/splunk-sdk-ruby/entity/message.rb +36 -0
  26. data/lib/splunk-sdk-ruby/entity/saved_search.rb +71 -0
  27. data/lib/splunk-sdk-ruby/entity/stanza.rb +45 -0
  28. data/lib/splunk-sdk-ruby/entity_not_ready.rb +26 -0
  29. data/lib/splunk-sdk-ruby/illegal_operation.rb +27 -0
  30. data/lib/splunk-sdk-ruby/namespace.rb +239 -0
  31. data/lib/splunk-sdk-ruby/resultsreader.rb +716 -0
  32. data/lib/splunk-sdk-ruby/service.rb +339 -0
  33. data/lib/splunk-sdk-ruby/splunk_http_error.rb +49 -0
  34. data/lib/splunk-sdk-ruby/synonyms.rb +50 -0
  35. data/lib/splunk-sdk-ruby/version.rb +27 -0
  36. data/lib/splunk-sdk-ruby/xml_shim.rb +117 -0
  37. data/splunk-sdk-ruby.gemspec +27 -0
  38. data/test/atom_test_data.rb +472 -0
  39. data/test/data/atom/atom_feed_with_message.xml +19 -0
  40. data/test/data/atom/atom_with_feed.xml +99 -0
  41. data/test/data/atom/atom_with_several_entries.xml +101 -0
  42. data/test/data/atom/atom_with_simple_entries.xml +30 -0
  43. data/test/data/atom/atom_without_feed.xml +248 -0
  44. data/test/data/export/4.2.5/export_results.xml +88 -0
  45. data/test/data/export/4.3.5/export_results.xml +87 -0
  46. data/test/data/export/5.0.1/export_results.xml +78 -0
  47. data/test/data/export/5.0.1/nonreporting.xml +232 -0
  48. data/test/data/results/4.2.5/results-empty.xml +0 -0
  49. data/test/data/results/4.2.5/results-preview.xml +255 -0
  50. data/test/data/results/4.2.5/results.xml +336 -0
  51. data/test/data/results/4.3.5/results-empty.xml +0 -0
  52. data/test/data/results/4.3.5/results-preview.xml +1057 -0
  53. data/test/data/results/4.3.5/results.xml +626 -0
  54. data/test/data/results/5.0.2/results-empty.xml +1 -0
  55. data/test/data/results/5.0.2/results-empty_preview.xml +1 -0
  56. data/test/data/results/5.0.2/results-preview.xml +448 -0
  57. data/test/data/results/5.0.2/results.xml +501 -0
  58. data/test/export_test_data.json +360 -0
  59. data/test/resultsreader_test_data.json +1119 -0
  60. data/test/services.server.info.xml +43 -0
  61. data/test/services.xml +111 -0
  62. data/test/test_atomfeed.rb +71 -0
  63. data/test/test_collection.rb +278 -0
  64. data/test/test_configuration_file.rb +124 -0
  65. data/test/test_context.rb +119 -0
  66. data/test/test_entity.rb +95 -0
  67. data/test/test_helper.rb +250 -0
  68. data/test/test_http_error.rb +52 -0
  69. data/test/test_index.rb +91 -0
  70. data/test/test_jobs.rb +319 -0
  71. data/test/test_messages.rb +17 -0
  72. data/test/test_namespace.rb +188 -0
  73. data/test/test_restarts.rb +49 -0
  74. data/test/test_resultsreader.rb +106 -0
  75. data/test/test_roles.rb +41 -0
  76. data/test/test_saved_searches.rb +119 -0
  77. data/test/test_service.rb +65 -0
  78. data/test/test_users.rb +33 -0
  79. data/test/test_xml_shim.rb +28 -0
  80. data/test/testfile.txt +1 -0
  81. metadata +200 -0
@@ -0,0 +1,26 @@
1
+ #--
2
+ # Copyright 2011-2013 Splunk, Inc.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"): you may
5
+ # not use this file except in compliance with the License. You may obtain
6
+ # a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
+ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
+ # License for the specific language governing permissions and limitations
14
+ # under the License.
15
+ #++
16
+
17
+ module Splunk
18
+ ##
19
+ # Exception thrown when fetching from an entity returns HTTP code 204.
20
+ #
21
+ # This primarily comes up with jobs. When a job is not yet ready, fetching
22
+ # it from the server returns code 204, and we want to handle it specially.
23
+ #
24
+ class EntityNotReady < StandardError
25
+ end
26
+ end
@@ -0,0 +1,27 @@
1
+ #--
2
+ # Copyright 2011-2013 Splunk, Inc.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"): you may
5
+ # not use this file except in compliance with the License. You may obtain
6
+ # a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
+ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
+ # License for the specific language governing permissions and limitations
14
+ # under the License.
15
+ #++
16
+
17
+ module Splunk
18
+ ##
19
+ # Exception thrown when a call is known statically to fail.
20
+ #
21
+ # +IllegalOperation+ is meant to be thrown when a call can be statically
22
+ # inferred to fail, such as trying to delete an index on versions of Splunk
23
+ # before 5.0. It implies that no round trips to the server were made.
24
+ #
25
+ class IllegalOperation < StandardError
26
+ end
27
+ end
@@ -0,0 +1,239 @@
1
+ #--
2
+ # Copyright 2011-2013 Splunk, Inc.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"): you may
5
+ # not use this file except in compliance with the License. You may obtain
6
+ # a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
+ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
+ # License for the specific language governing permissions and limitations
14
+ # under the License.
15
+ #++
16
+
17
+ ##
18
+ # Ruby representations of Splunk namespaces.
19
+ #
20
+ # Splunk's namespaces give access paths to objects. Each application, user,
21
+ # search job, saved search, or other entity in Splunk has a namespace, and
22
+ # when you access an entity via the REST API, you include a namespace in your
23
+ # query. What entities are visible to your query depends on the namespace you
24
+ # use for the query.
25
+ #
26
+ # Some namespaces can contain wildcards or default values filled in by Splunk.
27
+ # We call such namespaces _wildcard_, since they cannot be the namespace of an
28
+ # entity, only a query. Namespaces that can be the namespace of an entity are
29
+ # called _exact_.
30
+ #
31
+ # We distinguish six kinds of namespace, each of which is represented by a
32
+ # separate class:
33
+ #
34
+ # * +DefaultNamespace+, used for queries where you want to use
35
+ # whatever would be default for the user you are logged into Splunk as,
36
+ # and is the namespace of applications (which themselves determine namespaces,
37
+ # and so have to have a special one).
38
+ # * +GlobalNamespace+, which makes an entity visible anywhere in Splunk.
39
+ # * +SystemNamespace+, which is used for entities like users and roles that
40
+ # are part of Splunk. Entities in the system namespace are visible anywhere
41
+ # in Splunk.
42
+ # * +AppNamespace+, one per application installed in the Splunk instance.
43
+ # * +AppReferenceNamespace+, which is the namespace that applications themselves
44
+ # live in. It differs from +DefaultNamespace+ only in that it is a exact
45
+ # namespace.
46
+ # * The user namespaces, which are defined by a user _and_ an application.
47
+ #
48
+ # In the user and application namespaces, you can use +"-"+ as a wildcard
49
+ # in place of an actual user or application name.
50
+ #
51
+ # These are all represented in the Ruby SDK by correspondingly named classes:
52
+ # +DefaultNamespace+, +GlobalNamespace+, +SystemNamespace+, +AppNamespace+,
53
+ # and +UserNamespace+. Each of these have an empty mixin +Namespace+, so an
54
+ # instance of any of them will respond to +#is_a?(Namespace)+ with +true+.
55
+ #
56
+ # Some of these classes are singletons, some aren't, and to avoid confusion or
57
+ # having to remember which is which, you should create namespaces with the
58
+ # +namespace+ function.
59
+ #
60
+ # What namespace the +eai:acl+ fields in an entity map to is determined by what
61
+ # the path to that entity should be. In the end, a namespace is a way to
62
+ # calculate the initial path to access an entity. For example, applications all
63
+ # have +sharing="app"+ and +app=""+ in their +eai:acl+ fields, but their path
64
+ # uses the +services/+ prefix, so that particular combination, despite what it
65
+ # appears to be, is actually an +AppReferenceNamespace+.
66
+ #
67
+
68
+ require 'singleton'
69
+
70
+ module Splunk
71
+ ##
72
+ # Convert a hash of +eai:acl+ fields from Splunk's REST API into a namespace.
73
+ #
74
+ # _eai_acl_ should be a hash containing at least the key +"sharing"+, and,
75
+ # depending on the value associated with +"sharing"+, possibly keys +"app"+
76
+ # and +"owner"+.
77
+ #
78
+ # Returns: a +Namespace+.
79
+ #
80
+ def self.eai_acl_to_namespace(eai_acl)
81
+ namespace(:sharing => eai_acl["sharing"],
82
+ :app => eai_acl["app"],
83
+ :owner => eai_acl["owner"])
84
+ end
85
+
86
+ ##
87
+ # Create a +Namespace+.
88
+ #
89
+ #
90
+ # +namespace+ takes a hash of arguments, recognizing the keys +:sharing+,
91
+ # +:owner+, and +:app+. Among them, +:sharing+ is
92
+ # required, and depending on its value, the others may be required or not.
93
+ #
94
+ # +:sharing+ determines what kind of namespace is produced. It can have the
95
+ # values +"default"+, +"global"+, +"system"+, +"user"+, or +"app"+.
96
+ #
97
+ # If +:sharing+ is +"default"+, +"global"+, or +"system"+, the other two
98
+ # arguments are ignored. If +:sharing+ is +"app"+, only +:app+ is used,
99
+ # specifying the application of the namespace. If +:sharing+ is +"user"+,
100
+ # then both the +:app+ and +:owner+ arguments are used.
101
+ #
102
+ # If +:sharing+ is +"app"+ but +:app+ is +""+, it returns an
103
+ # +AppReferenceNamespace+.
104
+ #
105
+ # Returns: a +Namespace+.
106
+ #
107
+ def self.namespace(args)
108
+ sharing = args.fetch(:sharing, "default")
109
+ owner = args.fetch(:owner, nil)
110
+ app = args.fetch(:app, nil)
111
+
112
+ if sharing == "system"
113
+ return SystemNamespace.instance
114
+ elsif sharing == "global"
115
+ return GlobalNamespace.instance
116
+ elsif sharing == "user"
117
+ if owner.nil? or owner == ""
118
+ raise ArgumentError.new("Must provide an owner for user namespaces.")
119
+ elsif app.nil? or app == ""
120
+ raise ArgumentError.new("Must provide an app for user namespaces.")
121
+ else
122
+ return UserNamespace.new(owner, app)
123
+ end
124
+ elsif sharing == "app"
125
+ if app.nil?
126
+ raise ArgumentError.new("Must specify an application for application sharing")
127
+ elsif args[:app] == ""
128
+ return AppReferenceNamespace.instance
129
+ else
130
+ return AppNamespace.new(args[:app])
131
+ end
132
+ elsif sharing == "default"
133
+ return DefaultNamespace.instance
134
+ else
135
+ raise ArgumentError.new("Unknown sharing value: #{sharing}")
136
+ end
137
+ end
138
+
139
+ ##
140
+ # A mixin that fills the role of an abstract base class.
141
+ #
142
+ # Namespaces have two methods: +is_exact?+ and +to_path_fragment+, and
143
+ # can be compared for equality.
144
+ #
145
+ module Namespace
146
+ ##
147
+ # Is this a exact namespace?
148
+ #
149
+ # Returns: +true+ or +false+.
150
+ #
151
+ def is_exact?() end
152
+
153
+ ##
154
+ # Returns the URL prefix corresponding to this namespace.
155
+ #
156
+ # The prefix is returned as a list of strings. The strings
157
+ # are _not_ URL encoded. You need to URL encode them when
158
+ # you construct your URL.
159
+ #
160
+ # Returns: an +Array+ of +Strings+.
161
+ #
162
+ def to_path_fragment() end
163
+ end
164
+
165
+ class GlobalNamespace # :nodoc:
166
+ include Singleton
167
+ include Namespace
168
+ def is_exact?() true end
169
+ def to_path_fragment() ["servicesNS", "nobody", "system"] end
170
+ end
171
+
172
+ class SystemNamespace # :nodoc:
173
+ include Singleton
174
+ include Namespace
175
+ def is_exact?() true end
176
+ def to_path_fragment() ["servicesNS", "nobody", "system"] end
177
+ end
178
+
179
+ class DefaultNamespace # :nodoc:
180
+ include Singleton
181
+ include Namespace
182
+ # A services/ namespace always uses the current user
183
+ # and current app, neither of which are wildcards, so this
184
+ # namespace is guaranteed to be exact.
185
+ def is_exact?() true end
186
+ def to_path_fragment() ["services"] end
187
+ end
188
+
189
+ class AppReferenceNamespace # :nodoc:
190
+ include Singleton
191
+ include Namespace
192
+ def is_exact?() true end
193
+ def to_path_fragment() ["services"] end
194
+ end
195
+
196
+ class AppNamespace # :nodoc:
197
+ include Namespace
198
+ attr_reader :app
199
+
200
+ def initialize(app)
201
+ @app = app
202
+ end
203
+
204
+ def ==(other)
205
+ other.is_a?(AppNamespace) && @app == other.app
206
+ end
207
+
208
+ def is_exact?()
209
+ @app != "-"
210
+ end
211
+
212
+ def to_path_fragment()
213
+ ["servicesNS", "nobody", @app]
214
+ end
215
+ end
216
+
217
+ class UserNamespace # :nodoc:
218
+ include Namespace
219
+ attr_reader :user, :app
220
+
221
+ def initialize(user, app)
222
+ @user = user
223
+ @app = app
224
+ end
225
+
226
+ def ==(other)
227
+ other.is_a?(UserNamespace) && @app == other.app &&
228
+ @user == other.user
229
+ end
230
+
231
+ def is_exact?()
232
+ (@app != "-") && (@user != "-")
233
+ end
234
+
235
+ def to_path_fragment()
236
+ ["servicesNS", @user, @app]
237
+ end
238
+ end
239
+ end
@@ -0,0 +1,716 @@
1
+ #--
2
+ # Copyright 2011-2013 Splunk, Inc.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"): you may
5
+ # not use this file except in compliance with the License. You may obtain
6
+ # a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
+ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
+ # License for the specific language governing permissions and limitations
14
+ # under the License.
15
+ #++
16
+
17
+ ##
18
+ # +resultsreader.rb+ provides classes to incrementally parse the XML output from
19
+ # Splunk search jobs. For most search jobs you will want +ResultsReader+, which
20
+ # handles a single results set. However, the running a blocking export job from
21
+ # the +search/jobs/export endpoint+ sends back a stream of results sets, all but
22
+ # the last of which are previews. In this case, you should use the
23
+ # +MultiResultsReader+, which will let you iterate over the results sets.
24
+ #
25
+ # By default, +ResultsReader+ will try to use Nokogiri for XML parsing. If
26
+ # Nokogiri isn't available, it will fall back to REXML, which ships with Ruby
27
+ # 1.9. See +xml_shim.rb+ for how to alter this behavior.
28
+ #
29
+
30
+ #--
31
+ # There are two basic designs we could have used for handling the
32
+ # search/jobs/export output. We could either have the user call
33
+ # +ResultsReader#each+ multiple times, each time going through the next results
34
+ # set, or we could do what we have here and have an outer iterator that yields
35
+ # distinct +ResultsReader+ objects for each results set.
36
+ #
37
+ # The outer iterator is syntactically somewhat clearer, but you must invalidate
38
+ # the previous +ResultsReader+ objects before yielding a new one so that code
39
+ # like
40
+ #
41
+ # readers = []
42
+ # outer_iter.each do |reader|
43
+ # readers << reader
44
+ # end
45
+ # readers[2].each do |result|
46
+ # puts result
47
+ # end
48
+ #
49
+ # will throw an error on the second each. The right behavior is to throw an
50
+ # exception in the +ResultsReader+ each if it is invoked out of order. This
51
+ # problem doesn't affect the all-in-one design.
52
+ #
53
+ # However, in the all-in-one design, it is impossible to set the is_preview and
54
+ # fields instance variables of the +ResultsReader+ correctly between invocations
55
+ # of each. This makes code with the all-in-one design such as
56
+ #
57
+ # while reader.is_preview
58
+ # reader.each do |result|
59
+ # ...
60
+ # end
61
+ # end
62
+ #
63
+ # If the ... contains a break, then there is no way to set is_preview correctly
64
+ # before the next iteration of the while loop. This problem does not affect
65
+ # the outer iterator design, and Fred Ross and Yunxin Wu were not able to come
66
+ # up with a way to make it work in the all-in-one design, so the SDK uses the
67
+ # outer iterator design.
68
+ #++
69
+
70
+ require 'stringio'
71
+
72
+ require_relative 'xml_shim'
73
+ require_relative 'collection/jobs' # To access ExportStream
74
+
75
+ module Splunk
76
+ # +ResultsReader+ parses Splunk's XML format for results into Ruby objects.
77
+ #
78
+ # You can use both Nokogiri and REXML. By default, the +ResultsReader+ will
79
+ # try to use Nokogiri, and if it is not available will fall back to REXML. If
80
+ # you want other behavior, see +xml_shim.rb+ for how to set the XML library.
81
+ #
82
+ # +ResultsReader is an +Enumerable+, so it has methods such as +each+ and
83
+ # +each_with_index+. However, since it's a stream parser, once you iterate
84
+ # through it once, it will thereafter be empty.
85
+ #
86
+ # Do not use +ResultsReader+ with the results of the +create_export+ or
87
+ # +create_stream+ methods on +Service+ or +Jobs+. These methods use endpoints
88
+ # which return a different set of data structures. Use +MultiResultsReader+
89
+ # instead for those cases. If you do use +ResultsReader+, it will return
90
+ # a concatenation of all non-preview events in the stream, but that behavior
91
+ # should be considered deprecated, and will result in a warning.
92
+ #
93
+ # The ResultsReader object has two additional methods:
94
+ #
95
+ # * +is_preview?+ returns a Boolean value that indicates whether these
96
+ # results are a preview from an unfinished search or not
97
+ # * +fields+ returns an array of all the fields that may appear in a result
98
+ # in this set, in the order they should be displayed (if you're going
99
+ # to make a table or the like)
100
+ #
101
+ # *Example*:
102
+ #
103
+ # require 'splunk-sdk-ruby'
104
+ #
105
+ # service = Splunk::connect(:username => "admin", :password => "changeme")
106
+ #
107
+ # stream = service.jobs.create_oneshot("search index=_internal | head 10")
108
+ # reader = ResultsReader.new(stream)
109
+ # puts reader.is_preview?
110
+ # # Prints: false
111
+ # reader.each do |result|
112
+ # puts result
113
+ # end
114
+ # # Prints a sequence of Hashes containing events.
115
+ #
116
+ class ResultsReader
117
+ include Enumerable
118
+
119
+ ##
120
+ # Are the results in this reader a preview from an unfinished search?
121
+ #
122
+ # Returns: +true+ or +false+, or +nil+ if the stream is empty.
123
+ #
124
+ def is_preview?
125
+ return @is_preview
126
+ end
127
+
128
+ ##
129
+ # An +Array+ of all the fields that may appear in each result.
130
+ #
131
+ # Note that any given result will contain a subset of these fields.
132
+ #
133
+ # Returns: an +Array+ of +Strings+.
134
+ #
135
+ attr_reader :fields
136
+
137
+ def initialize(text_or_stream)
138
+ if text_or_stream.nil?
139
+ stream = StringIO.new("")
140
+ elsif stream.is_a?(ExportStream)
141
+ # The sensible behavior on streams from the export endpoints is to
142
+ # skip all preview results and concatenate all others. The export
143
+ # functions wrap their streams in ExportStream to mark that they need
144
+ # this special handling.
145
+ @is_export = true
146
+ @reader = MultiResultsReader.new(text_or_stream).final_results()
147
+ @is_preview = @reader.is_preview?
148
+ @fields = @reader.fields
149
+ return
150
+ elsif !text_or_stream.respond_to?(:read)
151
+ # Strip because the XML libraries can be pissy.
152
+ stream = StringIO.new(text_or_stream.strip)
153
+ else
154
+ stream = text_or_stream
155
+ end
156
+
157
+ if stream.eof?
158
+ @is_preview = nil
159
+ @fields = []
160
+ elsif stream.is_a?(ExportStream)
161
+
162
+ else
163
+ # We use a SAX parser. +listener+ is the event handler, but a SAX
164
+ # parser won't usually transfer control during parsing.
165
+ # To incrementally return results as we parse, we have to put
166
+ # the parser into a +Fiber+ from which we can yield.
167
+ listener = ResultsListener.new()
168
+ @iteration_fiber = Fiber.new do
169
+ if $splunk_xml_library == :nokogiri
170
+ parser = Nokogiri::XML::SAX::Parser.new(listener)
171
+ parser.parse(stream)
172
+ else # Use REXML
173
+ REXML::Document.parse_stream(stream, listener)
174
+ end
175
+ end
176
+
177
+ @is_preview = @iteration_fiber.resume
178
+ @fields = @iteration_fiber.resume
179
+ @reached_end = false
180
+ end
181
+ end
182
+
183
+ def each()
184
+ # If we have been passed a stream from an export endpoint, it should be
185
+ # marked as such, and we handle it differently.
186
+ if @is_export
187
+ warn "[DEPRECATED] Do not use ResultsReader on the output of the " +
188
+ "export endpoint. Use MultiResultsReader instead."
189
+ reader = MultiResultsReader.new(@stream).final_results()
190
+ enum = reader.each()
191
+ else
192
+ enum = Enumerator.new() do |yielder|
193
+ if !@iteration_fiber.nil? # Handle the case of empty files
194
+ @reached_end = false
195
+ while true
196
+ result = @iteration_fiber.resume
197
+ break if result.nil? or result == :end_of_results_set
198
+ yielder << result
199
+ end
200
+ end
201
+ @reached_end = true
202
+ end
203
+ end
204
+
205
+ if block_given? # Apply the enumerator to a block if we have one
206
+ enum.each() { |e| yield e }
207
+ else
208
+ enum # Otherwise return the enumerator itself
209
+ end
210
+ end
211
+
212
+ ##
213
+ # Skips the rest of the events in this ResultsReader.
214
+ #
215
+ def skip_remaining_results()
216
+ if !@reached_end
217
+ each() { |result|}
218
+ end
219
+ end
220
+ end
221
+
222
+ ##
223
+ # +ResultsListener+ is the SAX event handler for +ResultsReader+.
224
+ #
225
+ # The authors of Nokogiri decided to make their SAX interface
226
+ # slightly incompatible with that of REXML. For example, REXML
227
+ # uses tag_start and passes attributes as a dictionary, while
228
+ # Nokogiri calls the same thing start_element, and passes
229
+ # attributes as an association list.
230
+ #
231
+ # This is a classic finite state machine parser. The `@states` variable
232
+ # contains a hash with the states as its values. Each hash contains
233
+ # functions giving the behavior of the state machine in that state.
234
+ # The actual methods on the function dispatch to these functions
235
+ # based upon the current state (as stored in `@state`).
236
+ #
237
+ # The parser initially runs until it has determined if the results are
238
+ # a preview, then calls +Fiber.yield+ to return it. Then it continues and
239
+ # tries to yield a field order, and then any results. (It will always yield
240
+ # a field order, even if it is empty). At the end of a results set, it yields
241
+ # +:end_of_results_set+.
242
+ #
243
+ class ResultsListener # :nodoc:
244
+ def initialize()
245
+ # @fields holds the accumulated list of fields from the fieldOrder
246
+ # element. If there has been no accumulation, it is set to
247
+ # :no_fieldOrder_found. For empty results sets, there is often no
248
+ # fieldOrder element, but we still want to yield an empty Array at the
249
+ # right point, so if we reach the end of a results element and @fields
250
+ # is still :no_fieldOrder_found, we yield an empty array at that point.
251
+ @fields = :no_fieldOrder_found
252
+ @concatenate = false
253
+ @is_preview = nil
254
+ @state = :base
255
+ @states = {
256
+ # Toplevel state.
257
+ :base => {
258
+ :start_element => lambda do |name, attributes|
259
+ if name == "results"
260
+ if !@concatenate
261
+ @is_preview = attributes["preview"] == "1"
262
+ Fiber.yield(@is_preview)
263
+ end
264
+ elsif name == "fieldOrder"
265
+ if !@concatenate
266
+ @state = :field_order
267
+ @fields = []
268
+ end
269
+ elsif name == "result"
270
+ @state = :result
271
+ @current_offset = Integer(attributes["offset"])
272
+ @current_result = {}
273
+ end
274
+ end,
275
+ :end_element => lambda do |name|
276
+ if name == "results" and !@concatenate
277
+ Fiber.yield([]) if @fields == :no_fieldOrder_found
278
+
279
+ if !@is_preview # Start concatenating events
280
+ @concatenate = true
281
+ else
282
+ # Reset the fieldOrder
283
+ @fields = :no_fieldOrder_found
284
+ Fiber.yield(:end_of_results_set)
285
+ end
286
+ end
287
+ end
288
+ },
289
+ # Inside a `fieldOrder` element. Recognizes only
290
+ # the `field` element, and returns to the `:base` state
291
+ # when it encounters `</fieldOrder>`.
292
+ :field_order => {
293
+ :start_element => lambda do |name, attributes|
294
+ if name == "field"
295
+ @state = :field_order_field
296
+ end
297
+ end,
298
+ :end_element => lambda do |name|
299
+ if name == "fieldOrder"
300
+ @state = :base
301
+ Fiber.yield(@fields)
302
+ end
303
+ end
304
+ },
305
+ # When the parser in `:field_order` state encounters
306
+ # a `field` element, it jumps to this state to record it.
307
+ # When `</field>` is encountered, jumps back to `:field_order`.
308
+ :field_order_field => {
309
+ :characters => lambda do |text|
310
+ @fields << text.strip
311
+ end,
312
+ :end_element => lambda do |name|
313
+ if name == "field"
314
+ @state = :field_order
315
+ end
316
+ end
317
+ },
318
+ # When the parser has hit the `result` element, it jumps here.
319
+ # When this state hits `</result>`, it calls `Fiber.yield` to
320
+ # send the completed result back, and, when the fiber is
321
+ # resumed, jumps back to the `:base` state.
322
+ :result => {
323
+ :start_element => lambda do |name, attributes|
324
+ if name == "field"
325
+ @current_field = attributes["k"]
326
+ @current_value = nil
327
+ elsif name == "text" || name == "v"
328
+ @state = :field_values
329
+ @current_scratch = ""
330
+ end
331
+ end,
332
+ :end_element => lambda do |name|
333
+ if name == "result"
334
+ Fiber.yield @current_result
335
+ @current_result = nil
336
+ @current_offset = nil
337
+ @state = :base
338
+ elsif name == "field"
339
+ if @current_result.has_key?(@current_field)
340
+ if @current_result[@current_field].is_a?(Array)
341
+ @current_result[@current_field] << @current_value
342
+ elsif @current_result[@current_field] != nil
343
+ @current_result[@current_field] =
344
+ [@current_result[@current_field], @current_value]
345
+ end
346
+ else
347
+ @current_result[@current_field] = @current_value
348
+ end
349
+ @current_field = nil
350
+ @current_value = nil
351
+ end
352
+ end
353
+ },
354
+ # Parse the values inside a results field.
355
+ :field_values => {
356
+ :end_element => lambda do |name|
357
+ if name == "text" || name == "v"
358
+ if @current_value == nil
359
+ @current_value = @current_scratch
360
+ elsif @current_value.is_a?(Array)
361
+ @current_value << @current_scratch
362
+ else
363
+ @current_value = [@current_value, @current_scratch]
364
+ end
365
+
366
+ @current_scratch = nil
367
+ @state = :result
368
+ elsif name == "sg"
369
+ # <sg> is emitted to delimit text that should be displayed
370
+ # highlighted. We preserve it in field values.
371
+ @current_scratch << "</sg>"
372
+ end
373
+ end,
374
+ :start_element => lambda do |name, attributes|
375
+ if name == "sg"
376
+ s = ["sg"] + attributes.sort.map do |entry|
377
+ key, value = entry
378
+ "#{key}=\"#{value}\""
379
+ end
380
+ text = "<" + s.join(" ") + ">"
381
+ @current_scratch << text
382
+ end
383
+ end,
384
+ :characters => lambda do |text|
385
+ @current_scratch << text
386
+ end
387
+ }
388
+ }
389
+ end
390
+
391
+ # Nokogiri methods - all dispatch to the REXML methods.
392
+ def start_element(name, attributes)
393
+ # attributes is an association list. Turn it into a hash
394
+ # that tag_start can use.
395
+ attribute_dict = {}
396
+ attributes.each do |attribute|
397
+ key = attribute.localname
398
+ value = attribute.value
399
+ attribute_dict[key] = value
400
+ end
401
+
402
+ tag_start(name, attribute_dict)
403
+ end
404
+
405
+ def start_element_namespace(name, attributes=[], prefix=nil, uri=nil, ns=[])
406
+ start_element(name, attributes)
407
+ end
408
+
409
+ def end_element(name)
410
+ tag_end(name)
411
+ end
412
+
413
+ def end_element_namespace(name, prefix = nil, uri = nil)
414
+ end_element(name)
415
+ end
416
+
417
+ def characters(text)
418
+ text(text)
419
+ end
420
+
421
+ # REXML methods - all dispatch is done here
422
+ def tag_start(name, attributes)
423
+ # attributes is a hash.
424
+ if @states[@state].has_key?(:start_element)
425
+ @states[@state][:start_element].call(name, attributes)
426
+ end
427
+ end
428
+
429
+ def tag_end(name)
430
+ if @states[@state].has_key?(:end_element)
431
+ @states[@state][:end_element].call(name)
432
+ end
433
+ end
434
+
435
+ def text(text)
436
+ if @states[@state].has_key?(:characters)
437
+ @states[@state][:characters].call(text)
438
+ end
439
+ end
440
+
441
+ # Unused methods in Nokogiri
442
+ def cdata_block(string) end
443
+ def comment(string) end
444
+ def end_document() end
445
+ def error(string) end
446
+ def start_document() end
447
+ def warning(string) end
448
+ # xmldecl declared in REXML list below.
449
+
450
+ # Unused methods in REXML
451
+ def attlistdecl(element_name, attributes, raw_content) end
452
+ def cdata(content) end
453
+ def comment(comment) end
454
+ def doctype(name, pub_sys, long_name, uri) end
455
+ def doctype_end() end
456
+ def elementdecl(content) end
457
+ def entity(content) end
458
+ def entitydecl(content) end
459
+ def instruction(name, instruction) end
460
+ def notationdecl(content) end
461
+ def xmldecl(version, encoding, standalone) end
462
+ end
463
+
464
+ ##
465
+ # Version of +ResultsReader+ that accepts an external parsing state.
466
+ #
467
+ # +ResultsReader+ sets up its own Fiber for doing SAX parsing of the XML,
468
+ # but for the +MultiResultsReader+, we want to share a single fiber among
469
+ # all the results readers that we create. +PuppetResultsReader+ takes
470
+ # the fiber, is_preview, and fields information from its constructor
471
+ # and then exposes the same methods as ResultsReader.
472
+ #
473
+ # You should never create an instance of +PuppetResultsReader+ by hand. It
474
+ # will be passed back from iterating over a +MultiResultsReader+.
475
+ #
476
+ class PuppetResultsReader < ResultsReader
477
+ def initialize(fiber, is_preview, fields)
478
+ @valid = true
479
+ @iteration_fiber = fiber
480
+ @is_preview = is_preview
481
+ @fields = fields
482
+ end
483
+
484
+ def each()
485
+ if !@valid
486
+ raise StandardError.new("Cannot iterate on ResultsReaders out of order.")
487
+ else
488
+ super()
489
+ end
490
+ end
491
+
492
+ def invalidate()
493
+ @valid = false
494
+ end
495
+ end
496
+
497
+ ##
498
+ # Parser for the XML results sets returned by blocking export jobs.
499
+ #
500
+ # The methods +create_export+ and +create_stream+ on +Jobs+ and +Service+
501
+ # do not return data in quite the same format as other search jobs in Splunk.
502
+ # They will return a sequence of preview results sets, and then (if they are
503
+ # not real time searches) a final results set.
504
+ #
505
+ # +MultiResultsReader+ takes the stream returned by such a call, and provides
506
+ # iteration over each results set, or access to only the final, non-preview
507
+ # results set.
508
+ #
509
+ #
510
+ # *Examples*:
511
+ # require 'splunk-sdk-ruby'
512
+ #
513
+ # service = Splunk::connect(:username => "admin", :password => "changeme")
514
+ #
515
+ # stream = service.jobs.create_export("search index=_internal | head 10")
516
+ #
517
+ # readers = MultiResultsReader.new(stream)
518
+ # readers.each do |reader|
519
+ # puts "New result set (preview=#{reader.is_preview?})"
520
+ # reader.each do |result|
521
+ # puts result
522
+ # end
523
+ # end
524
+ #
525
+ # # Alternately
526
+ # reader = readers.final_results()
527
+ # reader.each do |result|
528
+ # puts result
529
+ # end
530
+ #
531
+ class MultiResultsReader
532
+ include Enumerable
533
+
534
+ def initialize(text_or_stream)
535
+ if text_or_stream.nil?
536
+ stream = StringIO.new("")
537
+ elsif !text_or_stream.respond_to?(:read)
538
+ # Strip because the XML libraries can be pissy.
539
+ stream = StringIO.new(text_or_stream.strip)
540
+ else
541
+ stream = text_or_stream
542
+ end
543
+
544
+ listener = ResultsListener.new()
545
+ @iteration_fiber = Fiber.new do
546
+ if $splunk_xml_library == :nokogiri
547
+ parser = Nokogiri::XML::SAX::Parser.new(listener)
548
+ # Nokogiri requires a unique root element, which we are fabricating
549
+ # here, while REXML is fine with multiple root elements in a stream.
550
+ edited_stream = ConcatenatedStream.new(
551
+ StringIO.new("<fake-root-element>"),
552
+ XMLDTDFilter.new(stream),
553
+ StringIO.new("</fake-root-element>")
554
+ )
555
+ parser.parse(edited_stream)
556
+ else # Use REXML
557
+ REXML::Document.parse_stream(stream, listener)
558
+ end
559
+ end
560
+ end
561
+
562
+ def each()
563
+ enum = Enumerator.new() do |yielder|
564
+ if !@iteration_fiber.nil? # Handle the case of empty files
565
+ begin
566
+ while true
567
+ is_preview = @iteration_fiber.resume
568
+ fields = @iteration_fiber.resume
569
+ reader = PuppetResultsReader.new(@iteration_fiber, is_preview, fields)
570
+ yielder << reader
571
+ # Finish extracting any events that the user didn't read.
572
+ # Otherwise the next results reader will start in the middle of
573
+ # the previous results set.
574
+ reader.skip_remaining_results()
575
+ reader.invalidate()
576
+ end
577
+ rescue FiberError
578
+ # After the last result element, the next evaluation of
579
+ # 'is_preview = @iteration_fiber.resume' above will throw a
580
+ # +FiberError+ when the fiber terminates without yielding any
581
+ # additional values. We handle the control flow in this way so
582
+ # that the final code in the fiber to handle cleanup always gets
583
+ # run.
584
+ end
585
+ end
586
+ end
587
+
588
+ if block_given? # Apply the enumerator to a block if we have one
589
+ enum.each() { |e| yield e }
590
+ else
591
+ enum # Otherwise return the enumerator itself
592
+ end
593
+ end
594
+
595
+ ##
596
+ # Returns a +ResultsReader+ over only the non-preview results.
597
+ #
598
+ # If you run this method against a real time search job, which only ever
599
+ # produces preview results, it will loop forever. If you run it against
600
+ # a non-reporting system (that is, one that filters and extracts fields
601
+ # from events, but doesn't calculate a whole new set of events), you will
602
+ # get only the first few results, since you should be using the normal
603
+ # +ResultsReader+, not +MultiResultsReader+, in that case.
604
+ #
605
+ def final_results()
606
+ each do |reader|
607
+ if reader.is_preview?
608
+ reader.skip_remaining_results()
609
+ else
610
+ return reader
611
+ end
612
+ end
613
+ end
614
+ end
615
+
616
+
617
+ ##
618
+ # Stream transformer that filters out XML DTD definitions.
619
+ #
620
+ # +XMLDTDFilter+ takes anything between <? and > to be a DTD. It does no
621
+ # escaping of quoted text.
622
+ #
623
+ class XMLDTDFilter < IO
624
+ def initialize(stream)
625
+ @stream = stream
626
+ @peeked_char = nil
627
+ end
628
+
629
+ def close()
630
+ @stream.close()
631
+ end
632
+
633
+ def read(n=nil)
634
+ response = ""
635
+
636
+ while n.nil? or n > 0
637
+ # First use any element we already peeked at.
638
+ if !@peeked_char.nil?
639
+ response << @peeked_char
640
+ @peeked_char = nil
641
+ if !n.nil?
642
+ n -= 1
643
+ end
644
+ next
645
+ end
646
+
647
+ c = @stream.read(1)
648
+ if c.nil? # We've reached the end of the stream
649
+ break
650
+ elsif c == "<" # We might have a DTD definition
651
+ d = @stream.read(1) || ""
652
+ if d == "?" # It's a DTD. Skip until we've consumed a >.
653
+ while true
654
+ q = @stream.read(1)
655
+ if q == ">"
656
+ break
657
+ end
658
+ end
659
+ else # It's not a DTD. Push that ? into lookahead.
660
+ @peeked_char = d
661
+ response << c
662
+ if !n.nil?
663
+ n = n-1
664
+ end
665
+ end
666
+ else # No special behavior
667
+ response << c
668
+ if !n.nil?
669
+ n -= 1
670
+ end
671
+ end
672
+ end
673
+ return response
674
+ end
675
+ end
676
+
677
+ ##
678
+ # Returns a stream which concatenates all the streams passed to it.
679
+ #
680
+ class ConcatenatedStream < IO
681
+ def initialize(*streams)
682
+ @streams = streams
683
+ end
684
+
685
+ def close()
686
+ @streams.each do |stream|
687
+ stream.close()
688
+ end
689
+ end
690
+
691
+ def read(n=nil)
692
+ response = ""
693
+ while n.nil? or n > 0
694
+ if @streams.empty? # No streams left
695
+ break
696
+ else # We have streams left.
697
+ chunk = @streams[0].read(n) || ""
698
+ found_n = chunk.length()
699
+ if n.nil? or chunk.length() < n
700
+ @streams.shift()
701
+ end
702
+ if !n.nil?
703
+ n -= chunk.length()
704
+ end
705
+
706
+ response << chunk
707
+ end
708
+ end
709
+ if response == ""
710
+ return nil
711
+ else
712
+ return response
713
+ end
714
+ end
715
+ end
716
+ end