splunk-sdk-ruby 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. data/CHANGELOG.md +160 -0
  2. data/Gemfile +8 -0
  3. data/LICENSE +177 -0
  4. data/README.md +310 -0
  5. data/Rakefile +40 -0
  6. data/examples/1_connect.rb +51 -0
  7. data/examples/2_manage.rb +103 -0
  8. data/examples/3_blocking_searches.rb +82 -0
  9. data/examples/4_asynchronous_searches.rb +79 -0
  10. data/examples/5_stream_data_to_splunk.rb +79 -0
  11. data/lib/splunk-sdk-ruby.rb +47 -0
  12. data/lib/splunk-sdk-ruby/ambiguous_entity_reference.rb +28 -0
  13. data/lib/splunk-sdk-ruby/atomfeed.rb +323 -0
  14. data/lib/splunk-sdk-ruby/collection.rb +417 -0
  15. data/lib/splunk-sdk-ruby/collection/apps.rb +35 -0
  16. data/lib/splunk-sdk-ruby/collection/case_insensitive_collection.rb +58 -0
  17. data/lib/splunk-sdk-ruby/collection/configuration_file.rb +50 -0
  18. data/lib/splunk-sdk-ruby/collection/configurations.rb +80 -0
  19. data/lib/splunk-sdk-ruby/collection/jobs.rb +136 -0
  20. data/lib/splunk-sdk-ruby/collection/messages.rb +51 -0
  21. data/lib/splunk-sdk-ruby/context.rb +522 -0
  22. data/lib/splunk-sdk-ruby/entity.rb +260 -0
  23. data/lib/splunk-sdk-ruby/entity/index.rb +191 -0
  24. data/lib/splunk-sdk-ruby/entity/job.rb +339 -0
  25. data/lib/splunk-sdk-ruby/entity/message.rb +36 -0
  26. data/lib/splunk-sdk-ruby/entity/saved_search.rb +71 -0
  27. data/lib/splunk-sdk-ruby/entity/stanza.rb +45 -0
  28. data/lib/splunk-sdk-ruby/entity_not_ready.rb +26 -0
  29. data/lib/splunk-sdk-ruby/illegal_operation.rb +27 -0
  30. data/lib/splunk-sdk-ruby/namespace.rb +239 -0
  31. data/lib/splunk-sdk-ruby/resultsreader.rb +716 -0
  32. data/lib/splunk-sdk-ruby/service.rb +339 -0
  33. data/lib/splunk-sdk-ruby/splunk_http_error.rb +49 -0
  34. data/lib/splunk-sdk-ruby/synonyms.rb +50 -0
  35. data/lib/splunk-sdk-ruby/version.rb +27 -0
  36. data/lib/splunk-sdk-ruby/xml_shim.rb +117 -0
  37. data/splunk-sdk-ruby.gemspec +27 -0
  38. data/test/atom_test_data.rb +472 -0
  39. data/test/data/atom/atom_feed_with_message.xml +19 -0
  40. data/test/data/atom/atom_with_feed.xml +99 -0
  41. data/test/data/atom/atom_with_several_entries.xml +101 -0
  42. data/test/data/atom/atom_with_simple_entries.xml +30 -0
  43. data/test/data/atom/atom_without_feed.xml +248 -0
  44. data/test/data/export/4.2.5/export_results.xml +88 -0
  45. data/test/data/export/4.3.5/export_results.xml +87 -0
  46. data/test/data/export/5.0.1/export_results.xml +78 -0
  47. data/test/data/export/5.0.1/nonreporting.xml +232 -0
  48. data/test/data/results/4.2.5/results-empty.xml +0 -0
  49. data/test/data/results/4.2.5/results-preview.xml +255 -0
  50. data/test/data/results/4.2.5/results.xml +336 -0
  51. data/test/data/results/4.3.5/results-empty.xml +0 -0
  52. data/test/data/results/4.3.5/results-preview.xml +1057 -0
  53. data/test/data/results/4.3.5/results.xml +626 -0
  54. data/test/data/results/5.0.2/results-empty.xml +1 -0
  55. data/test/data/results/5.0.2/results-empty_preview.xml +1 -0
  56. data/test/data/results/5.0.2/results-preview.xml +448 -0
  57. data/test/data/results/5.0.2/results.xml +501 -0
  58. data/test/export_test_data.json +360 -0
  59. data/test/resultsreader_test_data.json +1119 -0
  60. data/test/services.server.info.xml +43 -0
  61. data/test/services.xml +111 -0
  62. data/test/test_atomfeed.rb +71 -0
  63. data/test/test_collection.rb +278 -0
  64. data/test/test_configuration_file.rb +124 -0
  65. data/test/test_context.rb +119 -0
  66. data/test/test_entity.rb +95 -0
  67. data/test/test_helper.rb +250 -0
  68. data/test/test_http_error.rb +52 -0
  69. data/test/test_index.rb +91 -0
  70. data/test/test_jobs.rb +319 -0
  71. data/test/test_messages.rb +17 -0
  72. data/test/test_namespace.rb +188 -0
  73. data/test/test_restarts.rb +49 -0
  74. data/test/test_resultsreader.rb +106 -0
  75. data/test/test_roles.rb +41 -0
  76. data/test/test_saved_searches.rb +119 -0
  77. data/test/test_service.rb +65 -0
  78. data/test/test_users.rb +33 -0
  79. data/test/test_xml_shim.rb +28 -0
  80. data/test/testfile.txt +1 -0
  81. metadata +200 -0
@@ -0,0 +1,26 @@
1
+ #--
2
+ # Copyright 2011-2013 Splunk, Inc.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"): you may
5
+ # not use this file except in compliance with the License. You may obtain
6
+ # a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
+ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
+ # License for the specific language governing permissions and limitations
14
+ # under the License.
15
+ #++
16
+
17
+ module Splunk
18
+ ##
19
+ # Exception thrown when fetching from an entity returns HTTP code 204.
20
+ #
21
+ # This primarily comes up with jobs. When a job is not yet ready, fetching
22
+ # it from the server returns code 204, and we want to handle it specially.
23
+ #
24
+ class EntityNotReady < StandardError
25
+ end
26
+ end
@@ -0,0 +1,27 @@
1
+ #--
2
+ # Copyright 2011-2013 Splunk, Inc.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"): you may
5
+ # not use this file except in compliance with the License. You may obtain
6
+ # a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
+ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
+ # License for the specific language governing permissions and limitations
14
+ # under the License.
15
+ #++
16
+
17
+ module Splunk
18
+ ##
19
+ # Exception thrown when a call is known statically to fail.
20
+ #
21
+ # +IllegalOperation+ is meant to be thrown when a call can be statically
22
+ # inferred to fail, such as trying to delete an index on versions of Splunk
23
+ # before 5.0. It implies that no round trips to the server were made.
24
+ #
25
+ class IllegalOperation < StandardError
26
+ end
27
+ end
@@ -0,0 +1,239 @@
1
+ #--
2
+ # Copyright 2011-2013 Splunk, Inc.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"): you may
5
+ # not use this file except in compliance with the License. You may obtain
6
+ # a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
+ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
+ # License for the specific language governing permissions and limitations
14
+ # under the License.
15
+ #++
16
+
17
+ ##
18
+ # Ruby representations of Splunk namespaces.
19
+ #
20
+ # Splunk's namespaces give access paths to objects. Each application, user,
21
+ # search job, saved search, or other entity in Splunk has a namespace, and
22
+ # when you access an entity via the REST API, you include a namespace in your
23
+ # query. What entities are visible to your query depends on the namespace you
24
+ # use for the query.
25
+ #
26
+ # Some namespaces can contain wildcards or default values filled in by Splunk.
27
+ # We call such namespaces _wildcard_, since they cannot be the namespace of an
28
+ # entity, only a query. Namespaces that can be the namespace of an entity are
29
+ # called _exact_.
30
+ #
31
+ # We distinguish six kinds of namespace, each of which is represented by a
32
+ # separate class:
33
+ #
34
+ # * +DefaultNamespace+, used for queries where you want to use
35
+ # whatever would be default for the user you are logged into Splunk as,
36
+ # and is the namespace of applications (which themselves determine namespaces,
37
+ # and so have to have a special one).
38
+ # * +GlobalNamespace+, which makes an entity visible anywhere in Splunk.
39
+ # * +SystemNamespace+, which is used for entities like users and roles that
40
+ # are part of Splunk. Entities in the system namespace are visible anywhere
41
+ # in Splunk.
42
+ # * +AppNamespace+, one per application installed in the Splunk instance.
43
+ # * +AppReferenceNamespace+, which is the namespace that applications themselves
44
+ # live in. It differs from +DefaultNamespace+ only in that it is a exact
45
+ # namespace.
46
+ # * The user namespaces, which are defined by a user _and_ an application.
47
+ #
48
+ # In the user and application namespaces, you can use +"-"+ as a wildcard
49
+ # in place of an actual user or application name.
50
+ #
51
+ # These are all represented in the Ruby SDK by correspondingly named classes:
52
+ # +DefaultNamespace+, +GlobalNamespace+, +SystemNamespace+, +AppNamespace+,
53
+ # and +UserNamespace+. Each of these have an empty mixin +Namespace+, so an
54
+ # instance of any of them will respond to +#is_a?(Namespace)+ with +true+.
55
+ #
56
+ # Some of these classes are singletons, some aren't, and to avoid confusion or
57
+ # having to remember which is which, you should create namespaces with the
58
+ # +namespace+ function.
59
+ #
60
+ # What namespace the +eai:acl+ fields in an entity map to is determined by what
61
+ # the path to that entity should be. In the end, a namespace is a way to
62
+ # calculate the initial path to access an entity. For example, applications all
63
+ # have +sharing="app"+ and +app=""+ in their +eai:acl+ fields, but their path
64
+ # uses the +services/+ prefix, so that particular combination, despite what it
65
+ # appears to be, is actually an +AppReferenceNamespace+.
66
+ #
67
+
68
+ require 'singleton'
69
+
70
+ module Splunk
71
+ ##
72
+ # Convert a hash of +eai:acl+ fields from Splunk's REST API into a namespace.
73
+ #
74
+ # _eai_acl_ should be a hash containing at least the key +"sharing"+, and,
75
+ # depending on the value associated with +"sharing"+, possibly keys +"app"+
76
+ # and +"owner"+.
77
+ #
78
+ # Returns: a +Namespace+.
79
+ #
80
+ def self.eai_acl_to_namespace(eai_acl)
81
+ namespace(:sharing => eai_acl["sharing"],
82
+ :app => eai_acl["app"],
83
+ :owner => eai_acl["owner"])
84
+ end
85
+
86
+ ##
87
+ # Create a +Namespace+.
88
+ #
89
+ #
90
+ # +namespace+ takes a hash of arguments, recognizing the keys +:sharing+,
91
+ # +:owner+, and +:app+. Among them, +:sharing+ is
92
+ # required, and depending on its value, the others may be required or not.
93
+ #
94
+ # +:sharing+ determines what kind of namespace is produced. It can have the
95
+ # values +"default"+, +"global"+, +"system"+, +"user"+, or +"app"+.
96
+ #
97
+ # If +:sharing+ is +"default"+, +"global"+, or +"system"+, the other two
98
+ # arguments are ignored. If +:sharing+ is +"app"+, only +:app+ is used,
99
+ # specifying the application of the namespace. If +:sharing+ is +"user"+,
100
+ # then both the +:app+ and +:owner+ arguments are used.
101
+ #
102
+ # If +:sharing+ is +"app"+ but +:app+ is +""+, it returns an
103
+ # +AppReferenceNamespace+.
104
+ #
105
+ # Returns: a +Namespace+.
106
+ #
107
+ def self.namespace(args)
108
+ sharing = args.fetch(:sharing, "default")
109
+ owner = args.fetch(:owner, nil)
110
+ app = args.fetch(:app, nil)
111
+
112
+ if sharing == "system"
113
+ return SystemNamespace.instance
114
+ elsif sharing == "global"
115
+ return GlobalNamespace.instance
116
+ elsif sharing == "user"
117
+ if owner.nil? or owner == ""
118
+ raise ArgumentError.new("Must provide an owner for user namespaces.")
119
+ elsif app.nil? or app == ""
120
+ raise ArgumentError.new("Must provide an app for user namespaces.")
121
+ else
122
+ return UserNamespace.new(owner, app)
123
+ end
124
+ elsif sharing == "app"
125
+ if app.nil?
126
+ raise ArgumentError.new("Must specify an application for application sharing")
127
+ elsif args[:app] == ""
128
+ return AppReferenceNamespace.instance
129
+ else
130
+ return AppNamespace.new(args[:app])
131
+ end
132
+ elsif sharing == "default"
133
+ return DefaultNamespace.instance
134
+ else
135
+ raise ArgumentError.new("Unknown sharing value: #{sharing}")
136
+ end
137
+ end
138
+
139
+ ##
140
+ # A mixin that fills the role of an abstract base class.
141
+ #
142
+ # Namespaces have two methods: +is_exact?+ and +to_path_fragment+, and
143
+ # can be compared for equality.
144
+ #
145
+ module Namespace
146
+ ##
147
+ # Is this a exact namespace?
148
+ #
149
+ # Returns: +true+ or +false+.
150
+ #
151
+ def is_exact?() end
152
+
153
+ ##
154
+ # Returns the URL prefix corresponding to this namespace.
155
+ #
156
+ # The prefix is returned as a list of strings. The strings
157
+ # are _not_ URL encoded. You need to URL encode them when
158
+ # you construct your URL.
159
+ #
160
+ # Returns: an +Array+ of +Strings+.
161
+ #
162
+ def to_path_fragment() end
163
+ end
164
+
165
+ class GlobalNamespace # :nodoc:
166
+ include Singleton
167
+ include Namespace
168
+ def is_exact?() true end
169
+ def to_path_fragment() ["servicesNS", "nobody", "system"] end
170
+ end
171
+
172
+ class SystemNamespace # :nodoc:
173
+ include Singleton
174
+ include Namespace
175
+ def is_exact?() true end
176
+ def to_path_fragment() ["servicesNS", "nobody", "system"] end
177
+ end
178
+
179
+ class DefaultNamespace # :nodoc:
180
+ include Singleton
181
+ include Namespace
182
+ # A services/ namespace always uses the current user
183
+ # and current app, neither of which are wildcards, so this
184
+ # namespace is guaranteed to be exact.
185
+ def is_exact?() true end
186
+ def to_path_fragment() ["services"] end
187
+ end
188
+
189
+ class AppReferenceNamespace # :nodoc:
190
+ include Singleton
191
+ include Namespace
192
+ def is_exact?() true end
193
+ def to_path_fragment() ["services"] end
194
+ end
195
+
196
+ class AppNamespace # :nodoc:
197
+ include Namespace
198
+ attr_reader :app
199
+
200
+ def initialize(app)
201
+ @app = app
202
+ end
203
+
204
+ def ==(other)
205
+ other.is_a?(AppNamespace) && @app == other.app
206
+ end
207
+
208
+ def is_exact?()
209
+ @app != "-"
210
+ end
211
+
212
+ def to_path_fragment()
213
+ ["servicesNS", "nobody", @app]
214
+ end
215
+ end
216
+
217
+ class UserNamespace # :nodoc:
218
+ include Namespace
219
+ attr_reader :user, :app
220
+
221
+ def initialize(user, app)
222
+ @user = user
223
+ @app = app
224
+ end
225
+
226
+ def ==(other)
227
+ other.is_a?(UserNamespace) && @app == other.app &&
228
+ @user == other.user
229
+ end
230
+
231
+ def is_exact?()
232
+ (@app != "-") && (@user != "-")
233
+ end
234
+
235
+ def to_path_fragment()
236
+ ["servicesNS", @user, @app]
237
+ end
238
+ end
239
+ end
@@ -0,0 +1,716 @@
1
+ #--
2
+ # Copyright 2011-2013 Splunk, Inc.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License"): you may
5
+ # not use this file except in compliance with the License. You may obtain
6
+ # a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
+ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
+ # License for the specific language governing permissions and limitations
14
+ # under the License.
15
+ #++
16
+
17
+ ##
18
+ # +resultsreader.rb+ provides classes to incrementally parse the XML output from
19
+ # Splunk search jobs. For most search jobs you will want +ResultsReader+, which
20
+ # handles a single results set. However, the running a blocking export job from
21
+ # the +search/jobs/export endpoint+ sends back a stream of results sets, all but
22
+ # the last of which are previews. In this case, you should use the
23
+ # +MultiResultsReader+, which will let you iterate over the results sets.
24
+ #
25
+ # By default, +ResultsReader+ will try to use Nokogiri for XML parsing. If
26
+ # Nokogiri isn't available, it will fall back to REXML, which ships with Ruby
27
+ # 1.9. See +xml_shim.rb+ for how to alter this behavior.
28
+ #
29
+
30
+ #--
31
+ # There are two basic designs we could have used for handling the
32
+ # search/jobs/export output. We could either have the user call
33
+ # +ResultsReader#each+ multiple times, each time going through the next results
34
+ # set, or we could do what we have here and have an outer iterator that yields
35
+ # distinct +ResultsReader+ objects for each results set.
36
+ #
37
+ # The outer iterator is syntactically somewhat clearer, but you must invalidate
38
+ # the previous +ResultsReader+ objects before yielding a new one so that code
39
+ # like
40
+ #
41
+ # readers = []
42
+ # outer_iter.each do |reader|
43
+ # readers << reader
44
+ # end
45
+ # readers[2].each do |result|
46
+ # puts result
47
+ # end
48
+ #
49
+ # will throw an error on the second each. The right behavior is to throw an
50
+ # exception in the +ResultsReader+ each if it is invoked out of order. This
51
+ # problem doesn't affect the all-in-one design.
52
+ #
53
+ # However, in the all-in-one design, it is impossible to set the is_preview and
54
+ # fields instance variables of the +ResultsReader+ correctly between invocations
55
+ # of each. This makes code with the all-in-one design such as
56
+ #
57
+ # while reader.is_preview
58
+ # reader.each do |result|
59
+ # ...
60
+ # end
61
+ # end
62
+ #
63
+ # If the ... contains a break, then there is no way to set is_preview correctly
64
+ # before the next iteration of the while loop. This problem does not affect
65
+ # the outer iterator design, and Fred Ross and Yunxin Wu were not able to come
66
+ # up with a way to make it work in the all-in-one design, so the SDK uses the
67
+ # outer iterator design.
68
+ #++
69
+
70
+ require 'stringio'
71
+
72
+ require_relative 'xml_shim'
73
+ require_relative 'collection/jobs' # To access ExportStream
74
+
75
+ module Splunk
76
+ # +ResultsReader+ parses Splunk's XML format for results into Ruby objects.
77
+ #
78
+ # You can use both Nokogiri and REXML. By default, the +ResultsReader+ will
79
+ # try to use Nokogiri, and if it is not available will fall back to REXML. If
80
+ # you want other behavior, see +xml_shim.rb+ for how to set the XML library.
81
+ #
82
+ # +ResultsReader is an +Enumerable+, so it has methods such as +each+ and
83
+ # +each_with_index+. However, since it's a stream parser, once you iterate
84
+ # through it once, it will thereafter be empty.
85
+ #
86
+ # Do not use +ResultsReader+ with the results of the +create_export+ or
87
+ # +create_stream+ methods on +Service+ or +Jobs+. These methods use endpoints
88
+ # which return a different set of data structures. Use +MultiResultsReader+
89
+ # instead for those cases. If you do use +ResultsReader+, it will return
90
+ # a concatenation of all non-preview events in the stream, but that behavior
91
+ # should be considered deprecated, and will result in a warning.
92
+ #
93
+ # The ResultsReader object has two additional methods:
94
+ #
95
+ # * +is_preview?+ returns a Boolean value that indicates whether these
96
+ # results are a preview from an unfinished search or not
97
+ # * +fields+ returns an array of all the fields that may appear in a result
98
+ # in this set, in the order they should be displayed (if you're going
99
+ # to make a table or the like)
100
+ #
101
+ # *Example*:
102
+ #
103
+ # require 'splunk-sdk-ruby'
104
+ #
105
+ # service = Splunk::connect(:username => "admin", :password => "changeme")
106
+ #
107
+ # stream = service.jobs.create_oneshot("search index=_internal | head 10")
108
+ # reader = ResultsReader.new(stream)
109
+ # puts reader.is_preview?
110
+ # # Prints: false
111
+ # reader.each do |result|
112
+ # puts result
113
+ # end
114
+ # # Prints a sequence of Hashes containing events.
115
+ #
116
+ class ResultsReader
117
+ include Enumerable
118
+
119
+ ##
120
+ # Are the results in this reader a preview from an unfinished search?
121
+ #
122
+ # Returns: +true+ or +false+, or +nil+ if the stream is empty.
123
+ #
124
+ def is_preview?
125
+ return @is_preview
126
+ end
127
+
128
+ ##
129
+ # An +Array+ of all the fields that may appear in each result.
130
+ #
131
+ # Note that any given result will contain a subset of these fields.
132
+ #
133
+ # Returns: an +Array+ of +Strings+.
134
+ #
135
+ attr_reader :fields
136
+
137
+ def initialize(text_or_stream)
138
+ if text_or_stream.nil?
139
+ stream = StringIO.new("")
140
+ elsif stream.is_a?(ExportStream)
141
+ # The sensible behavior on streams from the export endpoints is to
142
+ # skip all preview results and concatenate all others. The export
143
+ # functions wrap their streams in ExportStream to mark that they need
144
+ # this special handling.
145
+ @is_export = true
146
+ @reader = MultiResultsReader.new(text_or_stream).final_results()
147
+ @is_preview = @reader.is_preview?
148
+ @fields = @reader.fields
149
+ return
150
+ elsif !text_or_stream.respond_to?(:read)
151
+ # Strip because the XML libraries can be pissy.
152
+ stream = StringIO.new(text_or_stream.strip)
153
+ else
154
+ stream = text_or_stream
155
+ end
156
+
157
+ if stream.eof?
158
+ @is_preview = nil
159
+ @fields = []
160
+ elsif stream.is_a?(ExportStream)
161
+
162
+ else
163
+ # We use a SAX parser. +listener+ is the event handler, but a SAX
164
+ # parser won't usually transfer control during parsing.
165
+ # To incrementally return results as we parse, we have to put
166
+ # the parser into a +Fiber+ from which we can yield.
167
+ listener = ResultsListener.new()
168
+ @iteration_fiber = Fiber.new do
169
+ if $splunk_xml_library == :nokogiri
170
+ parser = Nokogiri::XML::SAX::Parser.new(listener)
171
+ parser.parse(stream)
172
+ else # Use REXML
173
+ REXML::Document.parse_stream(stream, listener)
174
+ end
175
+ end
176
+
177
+ @is_preview = @iteration_fiber.resume
178
+ @fields = @iteration_fiber.resume
179
+ @reached_end = false
180
+ end
181
+ end
182
+
183
+ def each()
184
+ # If we have been passed a stream from an export endpoint, it should be
185
+ # marked as such, and we handle it differently.
186
+ if @is_export
187
+ warn "[DEPRECATED] Do not use ResultsReader on the output of the " +
188
+ "export endpoint. Use MultiResultsReader instead."
189
+ reader = MultiResultsReader.new(@stream).final_results()
190
+ enum = reader.each()
191
+ else
192
+ enum = Enumerator.new() do |yielder|
193
+ if !@iteration_fiber.nil? # Handle the case of empty files
194
+ @reached_end = false
195
+ while true
196
+ result = @iteration_fiber.resume
197
+ break if result.nil? or result == :end_of_results_set
198
+ yielder << result
199
+ end
200
+ end
201
+ @reached_end = true
202
+ end
203
+ end
204
+
205
+ if block_given? # Apply the enumerator to a block if we have one
206
+ enum.each() { |e| yield e }
207
+ else
208
+ enum # Otherwise return the enumerator itself
209
+ end
210
+ end
211
+
212
+ ##
213
+ # Skips the rest of the events in this ResultsReader.
214
+ #
215
+ def skip_remaining_results()
216
+ if !@reached_end
217
+ each() { |result|}
218
+ end
219
+ end
220
+ end
221
+
222
+ ##
223
+ # +ResultsListener+ is the SAX event handler for +ResultsReader+.
224
+ #
225
+ # The authors of Nokogiri decided to make their SAX interface
226
+ # slightly incompatible with that of REXML. For example, REXML
227
+ # uses tag_start and passes attributes as a dictionary, while
228
+ # Nokogiri calls the same thing start_element, and passes
229
+ # attributes as an association list.
230
+ #
231
+ # This is a classic finite state machine parser. The `@states` variable
232
+ # contains a hash with the states as its values. Each hash contains
233
+ # functions giving the behavior of the state machine in that state.
234
+ # The actual methods on the function dispatch to these functions
235
+ # based upon the current state (as stored in `@state`).
236
+ #
237
+ # The parser initially runs until it has determined if the results are
238
+ # a preview, then calls +Fiber.yield+ to return it. Then it continues and
239
+ # tries to yield a field order, and then any results. (It will always yield
240
+ # a field order, even if it is empty). At the end of a results set, it yields
241
+ # +:end_of_results_set+.
242
+ #
243
+ class ResultsListener # :nodoc:
244
+ def initialize()
245
+ # @fields holds the accumulated list of fields from the fieldOrder
246
+ # element. If there has been no accumulation, it is set to
247
+ # :no_fieldOrder_found. For empty results sets, there is often no
248
+ # fieldOrder element, but we still want to yield an empty Array at the
249
+ # right point, so if we reach the end of a results element and @fields
250
+ # is still :no_fieldOrder_found, we yield an empty array at that point.
251
+ @fields = :no_fieldOrder_found
252
+ @concatenate = false
253
+ @is_preview = nil
254
+ @state = :base
255
+ @states = {
256
+ # Toplevel state.
257
+ :base => {
258
+ :start_element => lambda do |name, attributes|
259
+ if name == "results"
260
+ if !@concatenate
261
+ @is_preview = attributes["preview"] == "1"
262
+ Fiber.yield(@is_preview)
263
+ end
264
+ elsif name == "fieldOrder"
265
+ if !@concatenate
266
+ @state = :field_order
267
+ @fields = []
268
+ end
269
+ elsif name == "result"
270
+ @state = :result
271
+ @current_offset = Integer(attributes["offset"])
272
+ @current_result = {}
273
+ end
274
+ end,
275
+ :end_element => lambda do |name|
276
+ if name == "results" and !@concatenate
277
+ Fiber.yield([]) if @fields == :no_fieldOrder_found
278
+
279
+ if !@is_preview # Start concatenating events
280
+ @concatenate = true
281
+ else
282
+ # Reset the fieldOrder
283
+ @fields = :no_fieldOrder_found
284
+ Fiber.yield(:end_of_results_set)
285
+ end
286
+ end
287
+ end
288
+ },
289
+ # Inside a `fieldOrder` element. Recognizes only
290
+ # the `field` element, and returns to the `:base` state
291
+ # when it encounters `</fieldOrder>`.
292
+ :field_order => {
293
+ :start_element => lambda do |name, attributes|
294
+ if name == "field"
295
+ @state = :field_order_field
296
+ end
297
+ end,
298
+ :end_element => lambda do |name|
299
+ if name == "fieldOrder"
300
+ @state = :base
301
+ Fiber.yield(@fields)
302
+ end
303
+ end
304
+ },
305
+ # When the parser in `:field_order` state encounters
306
+ # a `field` element, it jumps to this state to record it.
307
+ # When `</field>` is encountered, jumps back to `:field_order`.
308
+ :field_order_field => {
309
+ :characters => lambda do |text|
310
+ @fields << text.strip
311
+ end,
312
+ :end_element => lambda do |name|
313
+ if name == "field"
314
+ @state = :field_order
315
+ end
316
+ end
317
+ },
318
+ # When the parser has hit the `result` element, it jumps here.
319
+ # When this state hits `</result>`, it calls `Fiber.yield` to
320
+ # send the completed result back, and, when the fiber is
321
+ # resumed, jumps back to the `:base` state.
322
+ :result => {
323
+ :start_element => lambda do |name, attributes|
324
+ if name == "field"
325
+ @current_field = attributes["k"]
326
+ @current_value = nil
327
+ elsif name == "text" || name == "v"
328
+ @state = :field_values
329
+ @current_scratch = ""
330
+ end
331
+ end,
332
+ :end_element => lambda do |name|
333
+ if name == "result"
334
+ Fiber.yield @current_result
335
+ @current_result = nil
336
+ @current_offset = nil
337
+ @state = :base
338
+ elsif name == "field"
339
+ if @current_result.has_key?(@current_field)
340
+ if @current_result[@current_field].is_a?(Array)
341
+ @current_result[@current_field] << @current_value
342
+ elsif @current_result[@current_field] != nil
343
+ @current_result[@current_field] =
344
+ [@current_result[@current_field], @current_value]
345
+ end
346
+ else
347
+ @current_result[@current_field] = @current_value
348
+ end
349
+ @current_field = nil
350
+ @current_value = nil
351
+ end
352
+ end
353
+ },
354
+ # Parse the values inside a results field.
355
+ :field_values => {
356
+ :end_element => lambda do |name|
357
+ if name == "text" || name == "v"
358
+ if @current_value == nil
359
+ @current_value = @current_scratch
360
+ elsif @current_value.is_a?(Array)
361
+ @current_value << @current_scratch
362
+ else
363
+ @current_value = [@current_value, @current_scratch]
364
+ end
365
+
366
+ @current_scratch = nil
367
+ @state = :result
368
+ elsif name == "sg"
369
+ # <sg> is emitted to delimit text that should be displayed
370
+ # highlighted. We preserve it in field values.
371
+ @current_scratch << "</sg>"
372
+ end
373
+ end,
374
+ :start_element => lambda do |name, attributes|
375
+ if name == "sg"
376
+ s = ["sg"] + attributes.sort.map do |entry|
377
+ key, value = entry
378
+ "#{key}=\"#{value}\""
379
+ end
380
+ text = "<" + s.join(" ") + ">"
381
+ @current_scratch << text
382
+ end
383
+ end,
384
+ :characters => lambda do |text|
385
+ @current_scratch << text
386
+ end
387
+ }
388
+ }
389
+ end
390
+
391
+ # Nokogiri methods - all dispatch to the REXML methods.
392
+ def start_element(name, attributes)
393
+ # attributes is an association list. Turn it into a hash
394
+ # that tag_start can use.
395
+ attribute_dict = {}
396
+ attributes.each do |attribute|
397
+ key = attribute.localname
398
+ value = attribute.value
399
+ attribute_dict[key] = value
400
+ end
401
+
402
+ tag_start(name, attribute_dict)
403
+ end
404
+
405
+ def start_element_namespace(name, attributes=[], prefix=nil, uri=nil, ns=[])
406
+ start_element(name, attributes)
407
+ end
408
+
409
+ def end_element(name)
410
+ tag_end(name)
411
+ end
412
+
413
+ def end_element_namespace(name, prefix = nil, uri = nil)
414
+ end_element(name)
415
+ end
416
+
417
+ def characters(text)
418
+ text(text)
419
+ end
420
+
421
+ # REXML methods - all dispatch is done here
422
+ def tag_start(name, attributes)
423
+ # attributes is a hash.
424
+ if @states[@state].has_key?(:start_element)
425
+ @states[@state][:start_element].call(name, attributes)
426
+ end
427
+ end
428
+
429
+ def tag_end(name)
430
+ if @states[@state].has_key?(:end_element)
431
+ @states[@state][:end_element].call(name)
432
+ end
433
+ end
434
+
435
+ def text(text)
436
+ if @states[@state].has_key?(:characters)
437
+ @states[@state][:characters].call(text)
438
+ end
439
+ end
440
+
441
+ # Unused methods in Nokogiri
442
+ def cdata_block(string) end
443
+ def comment(string) end
444
+ def end_document() end
445
+ def error(string) end
446
+ def start_document() end
447
+ def warning(string) end
448
+ # xmldecl declared in REXML list below.
449
+
450
+ # Unused methods in REXML
451
+ def attlistdecl(element_name, attributes, raw_content) end
452
+ def cdata(content) end
453
+ def comment(comment) end
454
+ def doctype(name, pub_sys, long_name, uri) end
455
+ def doctype_end() end
456
+ def elementdecl(content) end
457
+ def entity(content) end
458
+ def entitydecl(content) end
459
+ def instruction(name, instruction) end
460
+ def notationdecl(content) end
461
+ def xmldecl(version, encoding, standalone) end
462
+ end
463
+
464
+ ##
465
+ # Version of +ResultsReader+ that accepts an external parsing state.
466
+ #
467
+ # +ResultsReader+ sets up its own Fiber for doing SAX parsing of the XML,
468
+ # but for the +MultiResultsReader+, we want to share a single fiber among
469
+ # all the results readers that we create. +PuppetResultsReader+ takes
470
+ # the fiber, is_preview, and fields information from its constructor
471
+ # and then exposes the same methods as ResultsReader.
472
+ #
473
+ # You should never create an instance of +PuppetResultsReader+ by hand. It
474
+ # will be passed back from iterating over a +MultiResultsReader+.
475
+ #
476
+ class PuppetResultsReader < ResultsReader
477
+ def initialize(fiber, is_preview, fields)
478
+ @valid = true
479
+ @iteration_fiber = fiber
480
+ @is_preview = is_preview
481
+ @fields = fields
482
+ end
483
+
484
+ def each()
485
+ if !@valid
486
+ raise StandardError.new("Cannot iterate on ResultsReaders out of order.")
487
+ else
488
+ super()
489
+ end
490
+ end
491
+
492
+ def invalidate()
493
+ @valid = false
494
+ end
495
+ end
496
+
497
+ ##
498
+ # Parser for the XML results sets returned by blocking export jobs.
499
+ #
500
+ # The methods +create_export+ and +create_stream+ on +Jobs+ and +Service+
501
+ # do not return data in quite the same format as other search jobs in Splunk.
502
+ # They will return a sequence of preview results sets, and then (if they are
503
+ # not real time searches) a final results set.
504
+ #
505
+ # +MultiResultsReader+ takes the stream returned by such a call, and provides
506
+ # iteration over each results set, or access to only the final, non-preview
507
+ # results set.
508
+ #
509
+ #
510
+ # *Examples*:
511
+ # require 'splunk-sdk-ruby'
512
+ #
513
+ # service = Splunk::connect(:username => "admin", :password => "changeme")
514
+ #
515
+ # stream = service.jobs.create_export("search index=_internal | head 10")
516
+ #
517
+ # readers = MultiResultsReader.new(stream)
518
+ # readers.each do |reader|
519
+ # puts "New result set (preview=#{reader.is_preview?})"
520
+ # reader.each do |result|
521
+ # puts result
522
+ # end
523
+ # end
524
+ #
525
+ # # Alternately
526
+ # reader = readers.final_results()
527
+ # reader.each do |result|
528
+ # puts result
529
+ # end
530
+ #
531
+ class MultiResultsReader
532
+ include Enumerable
533
+
534
+ def initialize(text_or_stream)
535
+ if text_or_stream.nil?
536
+ stream = StringIO.new("")
537
+ elsif !text_or_stream.respond_to?(:read)
538
+ # Strip because the XML libraries can be pissy.
539
+ stream = StringIO.new(text_or_stream.strip)
540
+ else
541
+ stream = text_or_stream
542
+ end
543
+
544
+ listener = ResultsListener.new()
545
+ @iteration_fiber = Fiber.new do
546
+ if $splunk_xml_library == :nokogiri
547
+ parser = Nokogiri::XML::SAX::Parser.new(listener)
548
+ # Nokogiri requires a unique root element, which we are fabricating
549
+ # here, while REXML is fine with multiple root elements in a stream.
550
+ edited_stream = ConcatenatedStream.new(
551
+ StringIO.new("<fake-root-element>"),
552
+ XMLDTDFilter.new(stream),
553
+ StringIO.new("</fake-root-element>")
554
+ )
555
+ parser.parse(edited_stream)
556
+ else # Use REXML
557
+ REXML::Document.parse_stream(stream, listener)
558
+ end
559
+ end
560
+ end
561
+
562
+ def each()
563
+ enum = Enumerator.new() do |yielder|
564
+ if !@iteration_fiber.nil? # Handle the case of empty files
565
+ begin
566
+ while true
567
+ is_preview = @iteration_fiber.resume
568
+ fields = @iteration_fiber.resume
569
+ reader = PuppetResultsReader.new(@iteration_fiber, is_preview, fields)
570
+ yielder << reader
571
+ # Finish extracting any events that the user didn't read.
572
+ # Otherwise the next results reader will start in the middle of
573
+ # the previous results set.
574
+ reader.skip_remaining_results()
575
+ reader.invalidate()
576
+ end
577
+ rescue FiberError
578
+ # After the last result element, the next evaluation of
579
+ # 'is_preview = @iteration_fiber.resume' above will throw a
580
+ # +FiberError+ when the fiber terminates without yielding any
581
+ # additional values. We handle the control flow in this way so
582
+ # that the final code in the fiber to handle cleanup always gets
583
+ # run.
584
+ end
585
+ end
586
+ end
587
+
588
+ if block_given? # Apply the enumerator to a block if we have one
589
+ enum.each() { |e| yield e }
590
+ else
591
+ enum # Otherwise return the enumerator itself
592
+ end
593
+ end
594
+
595
+ ##
596
+ # Returns a +ResultsReader+ over only the non-preview results.
597
+ #
598
+ # If you run this method against a real time search job, which only ever
599
+ # produces preview results, it will loop forever. If you run it against
600
+ # a non-reporting system (that is, one that filters and extracts fields
601
+ # from events, but doesn't calculate a whole new set of events), you will
602
+ # get only the first few results, since you should be using the normal
603
+ # +ResultsReader+, not +MultiResultsReader+, in that case.
604
+ #
605
+ def final_results()
606
+ each do |reader|
607
+ if reader.is_preview?
608
+ reader.skip_remaining_results()
609
+ else
610
+ return reader
611
+ end
612
+ end
613
+ end
614
+ end
615
+
616
+
617
+ ##
618
+ # Stream transformer that filters out XML DTD definitions.
619
+ #
620
+ # +XMLDTDFilter+ takes anything between <? and > to be a DTD. It does no
621
+ # escaping of quoted text.
622
+ #
623
+ class XMLDTDFilter < IO
624
+ def initialize(stream)
625
+ @stream = stream
626
+ @peeked_char = nil
627
+ end
628
+
629
+ def close()
630
+ @stream.close()
631
+ end
632
+
633
+ def read(n=nil)
634
+ response = ""
635
+
636
+ while n.nil? or n > 0
637
+ # First use any element we already peeked at.
638
+ if !@peeked_char.nil?
639
+ response << @peeked_char
640
+ @peeked_char = nil
641
+ if !n.nil?
642
+ n -= 1
643
+ end
644
+ next
645
+ end
646
+
647
+ c = @stream.read(1)
648
+ if c.nil? # We've reached the end of the stream
649
+ break
650
+ elsif c == "<" # We might have a DTD definition
651
+ d = @stream.read(1) || ""
652
+ if d == "?" # It's a DTD. Skip until we've consumed a >.
653
+ while true
654
+ q = @stream.read(1)
655
+ if q == ">"
656
+ break
657
+ end
658
+ end
659
+ else # It's not a DTD. Push that ? into lookahead.
660
+ @peeked_char = d
661
+ response << c
662
+ if !n.nil?
663
+ n = n-1
664
+ end
665
+ end
666
+ else # No special behavior
667
+ response << c
668
+ if !n.nil?
669
+ n -= 1
670
+ end
671
+ end
672
+ end
673
+ return response
674
+ end
675
+ end
676
+
677
+ ##
678
+ # Returns a stream which concatenates all the streams passed to it.
679
+ #
680
+ class ConcatenatedStream < IO
681
+ def initialize(*streams)
682
+ @streams = streams
683
+ end
684
+
685
+ def close()
686
+ @streams.each do |stream|
687
+ stream.close()
688
+ end
689
+ end
690
+
691
+ def read(n=nil)
692
+ response = ""
693
+ while n.nil? or n > 0
694
+ if @streams.empty? # No streams left
695
+ break
696
+ else # We have streams left.
697
+ chunk = @streams[0].read(n) || ""
698
+ found_n = chunk.length()
699
+ if n.nil? or chunk.length() < n
700
+ @streams.shift()
701
+ end
702
+ if !n.nil?
703
+ n -= chunk.length()
704
+ end
705
+
706
+ response << chunk
707
+ end
708
+ end
709
+ if response == ""
710
+ return nil
711
+ else
712
+ return response
713
+ end
714
+ end
715
+ end
716
+ end