wonderdog 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. data/.gitignore +2 -0
  2. data/.idea/encodings.xml +5 -0
  3. data/.idea/misc.xml +5 -0
  4. data/.idea/modules.xml +9 -0
  5. data/.idea/scopes/scope_settings.xml +5 -0
  6. data/.idea/vcs.xml +7 -0
  7. data/.idea/wonderdog.iml +41 -0
  8. data/Gemfile +1 -1
  9. data/bin/estool +22 -1
  10. data/bin/squirrel.rb +108 -0
  11. data/lib/wonderdog.rb +3 -0
  12. data/lib/wonderdog/hadoop_invocation_override.rb +4 -1
  13. data/lib/wonderdog/version.rb +1 -1
  14. data/pom.xml +1 -1
  15. data/spec/spec_helper.rb +1 -1
  16. data/spec/wonderdog/hadoop_invocation_override_spec.rb +1 -1
  17. data/squirrel/all_facets.rb +95 -0
  18. data/squirrel/change_es_index_settings.rb +19 -0
  19. data/squirrel/clear_es_caches.rb +30 -0
  20. data/squirrel/esbackup.rb +184 -0
  21. data/squirrel/esbackup_stripped.rb +153 -0
  22. data/squirrel/fields.sh +5 -0
  23. data/squirrel/getFields.rb +19 -0
  24. data/squirrel/replay.rb +219 -0
  25. data/squirrel/squirrel.rb +95 -0
  26. data/squirrel/warmer_interface.rb +59 -0
  27. data/src/main/java/com/infochimps/elasticsearch/ElasticSearchInputFormat.java +2 -2
  28. data/src/main/java/com/infochimps/elasticsearch/ElasticSearchStreamingInputFormat.java +14 -2
  29. data/src/main/java/com/infochimps/elasticsearch/ElasticSearchStreamingOutputFormat.java +20 -5
  30. data/src/main/java/com/infochimps/elasticsearch/ElasticSearchStreamingRecordReader.java +55 -26
  31. data/src/main/java/com/infochimps/elasticsearch/ElasticSearchStreamingRecordWriter.java +59 -22
  32. data/test/cardinality.rb +43 -0
  33. data/test/change_es_index_settings.rb +19 -0
  34. data/test/clear_es_caches.rb +30 -0
  35. data/test/config/mapping.yml +327 -0
  36. data/test/config/mappings.yml +328 -0
  37. data/test/count_check.txt +0 -0
  38. data/test/esbackup_stripped.rb +153 -0
  39. data/test/mapping.yml +327 -0
  40. data/test/medium_slow_queries +41 -0
  41. data/test/queries.txt +0 -0
  42. data/test/quick_test_slow_queries +4 -0
  43. data/test/run_pry.rb +3 -0
  44. data/test/some_slow_queries +53 -0
  45. data/test/warmer_interface.rb +64 -0
  46. data/test/warmindices.rb +65 -0
  47. data/wonderdog.gemspec +1 -1
  48. metadata +40 -7
data/.gitignore CHANGED
@@ -49,3 +49,5 @@ TAGS*
49
49
  a.out
50
50
  doc
51
51
  nohup.out
52
+ test/flight_count_20130405
53
+ test/flight_count_20130405_unpacked
@@ -0,0 +1,5 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false" />
4
+ </project>
5
+
@@ -0,0 +1,5 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="rbenv: 1.9.3-p392" project-jdk-type="RUBY_SDK" />
4
+ </project>
5
+
@@ -0,0 +1,9 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/wonderdog.iml" filepath="$PROJECT_DIR$/.idea/wonderdog.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
9
+
@@ -0,0 +1,5 @@
1
+ <component name="DependencyValidationManager">
2
+ <state>
3
+ <option name="SKIP_IMPORT_STATEMENTS" value="false" />
4
+ </state>
5
+ </component>
@@ -0,0 +1,7 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$" vcs="Git" />
5
+ </component>
6
+ </project>
7
+
@@ -0,0 +1,41 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="RUBY_MODULE" version="4">
3
+ <component name="FacetManager">
4
+ <facet type="gem" name="Gem">
5
+ <configuration>
6
+ <option name="GEM_APP_ROOT_PATH" value="$MODULE_DIR$" />
7
+ <option name="GEM_APP_TEST_PATH" value="$MODULE_DIR$/test" />
8
+ <option name="GEM_APP_LIB_PATH" value="$MODULE_DIR$/lib" />
9
+ </configuration>
10
+ </facet>
11
+ </component>
12
+ <component name="NewModuleRootManager">
13
+ <content url="file://$MODULE_DIR$">
14
+ <sourceFolder url="file://$MODULE_DIR$/test" isTestSource="true" />
15
+ </content>
16
+ <orderEntry type="inheritedJdk" />
17
+ <orderEntry type="sourceFolder" forTests="false" />
18
+ <orderEntry type="library" scope="PROVIDED" name="bundler (v1.3.5, rbenv: 1.9.3-p392) [gem]" level="application" />
19
+ <orderEntry type="library" scope="PROVIDED" name="configliere (v0.4.18, rbenv: 1.9.3-p392) [gem]" level="application" />
20
+ <orderEntry type="library" scope="PROVIDED" name="diff-lcs (v1.2.4, rbenv: 1.9.3-p392) [gem]" level="application" />
21
+ <orderEntry type="library" scope="PROVIDED" name="eventmachine (v1.0.3, rbenv: 1.9.3-p392) [gem]" level="application" />
22
+ <orderEntry type="library" scope="PROVIDED" name="forgery (v0.5.0, rbenv: 1.9.3-p392) [gem]" level="application" />
23
+ <orderEntry type="library" scope="PROVIDED" name="gorillib (v0.5.0, rbenv: 1.9.3-p392) [gem]" level="application" />
24
+ <orderEntry type="library" scope="PROVIDED" name="highline (v1.6.18, rbenv: 1.9.3-p392) [gem]" level="application" />
25
+ <orderEntry type="library" scope="PROVIDED" name="json (v1.7.7, rbenv: 1.9.3-p392) [gem]" level="application" />
26
+ <orderEntry type="library" scope="PROVIDED" name="log4r (v1.1.10, rbenv: 1.9.3-p392) [gem]" level="application" />
27
+ <orderEntry type="library" scope="PROVIDED" name="multi_json (v1.7.2, rbenv: 1.9.3-p392) [gem]" level="application" />
28
+ <orderEntry type="library" scope="PROVIDED" name="rake (v0.9.6, rbenv: 1.9.3-p392) [gem]" level="application" />
29
+ <orderEntry type="library" scope="PROVIDED" name="redcarpet (v2.2.2, rbenv: 1.9.3-p392) [gem]" level="application" />
30
+ <orderEntry type="library" scope="PROVIDED" name="rspec (v2.13.0, rbenv: 1.9.3-p392) [gem]" level="application" />
31
+ <orderEntry type="library" scope="PROVIDED" name="rspec-core (v2.13.1, rbenv: 1.9.3-p392) [gem]" level="application" />
32
+ <orderEntry type="library" scope="PROVIDED" name="rspec-expectations (v2.13.0, rbenv: 1.9.3-p392) [gem]" level="application" />
33
+ <orderEntry type="library" scope="PROVIDED" name="rspec-mocks (v2.13.1, rbenv: 1.9.3-p392) [gem]" level="application" />
34
+ <orderEntry type="library" scope="PROVIDED" name="uuidtools (v2.1.3, rbenv: 1.9.3-p392) [gem]" level="application" />
35
+ <orderEntry type="library" scope="PROVIDED" name="vayacondios-client (v0.1.12, rbenv: 1.9.3-p392) [gem]" level="application" />
36
+ <orderEntry type="library" scope="PROVIDED" name="wukong (v3.0.1, rbenv: 1.9.3-p392) [gem]" level="application" />
37
+ <orderEntry type="library" scope="PROVIDED" name="wukong-hadoop (v0.1.1, rbenv: 1.9.3-p392) [gem]" level="application" />
38
+ <orderEntry type="library" scope="PROVIDED" name="yard (v0.8.6.1, rbenv: 1.9.3-p392) [gem]" level="application" />
39
+ </component>
40
+ </module>
41
+
data/Gemfile CHANGED
@@ -1,4 +1,4 @@
1
- source :rubygems
1
+ source 'https://rubygems.org'
2
2
 
3
3
  gemspec
4
4
 
data/bin/estool CHANGED
@@ -23,6 +23,9 @@ Commands include:
23
23
  optimize Optimizes the specified INDEX to (-s) number of segments
24
24
  snapshot Snapshots the specified INDEX to the gateway
25
25
  segments Returns the segment information. Requires ElasticSearch v
26
+ aliases Returns a list of Index/Alias pairs
27
+ ialiases Returns a list of Alias/Index pairs
28
+ count The number of documents in an index
26
29
  mapping
27
30
  set_replication
28
31
  search
@@ -89,7 +92,7 @@ class ESTool
89
92
 
90
93
  def status() shell_response(File.join(options.index, "_status?")) ; end
91
94
 
92
- def list() status["indices"].keys ; end
95
+ def list() status["indices"].keys.sort ; end
93
96
 
94
97
  def health() shell_response("_cluster/health?") ; end
95
98
 
@@ -111,6 +114,24 @@ class ESTool
111
114
  def segments() shell_response(File.join(options.index, "_segments")) ; end
112
115
 
113
116
  def mapping() shell_response(File.join(options.index, "_mapping")) ; end
117
+
118
+ def aliases() shell_response('_aliases?').sort.inject({}){ |hsh, (index, info)| hsh[index] = info['aliases'].keys ; hsh } ; end
119
+
120
+ def ialiases()
121
+ inverse = Hash.new{ |hsh, key| hsh[key] = [] }
122
+ aliases.each{ |idx, als| als.each{ |a| inverse[a] << idx } }
123
+ inverse
124
+ end
125
+
126
+ # estool status -r | ruby -rjson -e 'puts JSON.parse($stdin.read)["indices"]["item_count_legacy"]["docs"]'
127
+ # {"num_docs"=>187749, "max_doc"=>187749, "deleted_docs"=>0}
128
+ def count()
129
+ if options.index == '_all'
130
+ status['indices'].inject({}){ |hsh, (index, info)| hsh[index] = info['docs']['num_docs'] ; hsh }
131
+ else
132
+ { options.index => shell_response(File.join(options.index, '_count'))['count'] }
133
+ end
134
+ end
114
135
 
115
136
  # curl -s -XPUT http://host:port/index/_settings -d '{"index":{"number_of_replicas":num}}'
116
137
  def set_replication() { "error" => "method not yet implemented" }; end
@@ -0,0 +1,108 @@
1
+ #! /usr/bin/ruby
2
+
3
+ require "configliere"
4
+ require_relative "../squirrel/squirrel.rb"
5
+
6
+ doc = <<DOC
7
+ This is the uber script the arguements you give it decide what happens
8
+ squirrel => Standard Query Ultracrepidate Iamatology Ruby Resource for Elasticsearch Labarum ##
9
+ example commands:
10
+ clear all caches
11
+ ruby squirrel.rb cache --host=localhost --port=9200 --clear_all_cache=true
12
+ run slow log queries
13
+ ruby squirrel.rb replay --host=localhost --port=9200 --preference=true,_primary_first --execute_slow_queries=/var/log/elasticsearch/padraig.log
14
+ get backup an index aka generate a dumpfile
15
+ ruby squirrel.rb backup --host=localhost --port=9200 --output_dir="." --dump_index=flight_count_20130405 --batch_size=100 --dump_mapping=flight_count_20130405_mapping.json
16
+ get the cardinality of a dumpfile(card_file)
17
+ ruby squirrel.rb cardinality --host=localhost --port=9200 --output_dir="." --card_file=flight_count_20130405 --cardinality=cnt,metric
18
+ restore an index from a dumpfile
19
+ ruby squirrel.rb restore --host=localhost --port=9200 --output_dir="." --restore_file=flight_count_20130405.gz --restore_index=flight_count_20130405 --restore_mapping=flight_count_20130405_mapping.json --batch_size=100
20
+ duplicate files in an index from a dumpfile(duplicate_file)
21
+ ruby squirrel.rb duplicate --host=localhost --port=9200 --output_dir="." --duplicate_file=flight_count_20130405.gz --duplicate_index=eight_flight_count_20130405 --duplicate_mapping=flight_count_20130405_mapping.json --batch_size=100
22
+ add warmer
23
+ ruby squirrel.rb warmer --host=localhost --port=9200 --output_dir="." --new_warmers_name=polite_warmer --warmers_index=flight_count_20130408 --create_warmer='{"sort" : ["_state", "flight_id","metric", "tb_h", "feature", "seconds", "base_feature", "metric_feature", "cnt", "_score"],"query":{"match_all":{}}}'
24
+ remove warmer
25
+ ruby squirrel.rb warmer --host=localhost --port=9200 --output_dir="." --remove_warmer=polite_warmer --warmers_index=flight_count_20130408
26
+ disable warmers
27
+ ruby squirrel.rb warmer --host=localhost --port=9200 --output_dir="." --warmers=false --warmers_index=flight_count_20130405
28
+ enable warmers
29
+ ruby squirrel.rb warmer --host=localhost --port=9200 --output_dir="." --warmers=false --warmers_index=flight_count_20130405
30
+ remove warmer
31
+ ruby squirrel.rb warmer --host=localhost --port=9200 --output_dir="." --remove_warmer=polite_warmer --warmers_index=flight_count_20130405
32
+ change index settings
33
+ ruby squirrel.rb index_settings --host=localhost --port=9200 --output_dir="." --settings_index=flight_count_20130405 --es_index_settings=refresh_interval,refresh_interval --es_index_settings_values=-1,0
34
+ DOC
35
+
36
+ Settings.use :commandline
37
+ Settings.use :commands
38
+ Settings.description = doc
39
+ Settings.define_command :backup, :description => "Create a dump gzip file of an index" do |cmd|
40
+ cmd.define :output_dir, :default => nil, :description => 'Directory to put output, defaults to nil'
41
+ cmd.define :dump_file, :default => nil, :description => 'The name of the dumpfile to use, default is nil'
42
+ cmd.define :dump_index, :default => nil, :description => 'Index to use, default is nil'
43
+ cmd.define :query, :default => nil, :description => 'Query to use in order to limit the data extracted from the index, default nil'
44
+ cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
45
+ cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
46
+ cmd.define :dump_mapping, :default => nil, :description => 'The file to put the json mapping in, defaults to nil'
47
+ cmd.define :batch_size, :type => Integer, :default => nil, :description => 'The number of lines to process at once, defaults to nil'
48
+ end
49
+ Settings.define_command :restore, :description => "Take the data from a dump gzip file and use it to populate an index" do |cmd|
50
+ cmd.define :output_dir, :default => nil, :description => 'Directory to put output, defaults to nil'
51
+ cmd.define :restore_file, :default => nil, :description => 'The name of the dumpfile to use, default is nil'
52
+ cmd.define :restore_index, :default => nil, :description => 'Index to use, default is nil'
53
+ cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
54
+ cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
55
+ cmd.define :restore_mapping, :default => nil, :description => 'The mapping file to use when restoring an index, defaults to nil'
56
+ cmd.define :batch_size, :type => Integer, :default => nil, :description => 'The number of lines to process at once, defaults to nil'
57
+ end
58
+ Settings.define_command :duplicate, :description => "Take the data from a dump gzip file and add it into an index allowing documents to be duplicated" do |cmd|
59
+ cmd.define :output_dir, :default => nil, :description => 'Directory to put output, defaults to nil'
60
+ cmd.define :duplicate_file, :default => nil, :description => 'The name of the dumpfile to use, default is nil'
61
+ cmd.define :duplicate_index, :default => nil, :description => 'Index to use, default is nil'
62
+ cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
63
+ cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
64
+ cmd.define :duplicated_mapping, :default => nil, :description => 'The mapping file to use when restoring an index, defaults to nil'
65
+ cmd.define :batch_size, :type => Integer, :default => nil, :description => 'The number of lines to process at once, defaults to nil'
66
+ end
67
+ Settings.define_command :cardinality, :description => "Count the number of unique valuse for the given field(s)" do |cmd|
68
+ cmd.define :output_dir, :default => nil, :description => 'Directory to put output, defaults to nil'
69
+ cmd.define :cardinality, :type => Array, :default => nil, :description => 'Return the cardinality of the given fields, defaults to nil'
70
+ cmd.define :card_file, :default => nil, :description => 'The dump file to grab info from when determining cardinality MUST NOT be compressed, defaults to nil'
71
+ end
72
+ Settings.define_command :warmer, :description => "Interact elasticsearch warmers" do |cmd|
73
+ cmd.define :warmers, :default => nil, :description => 'Use warmers expected values true/false, defaults to nil'
74
+ cmd.define :warmers_index, :default => nil, :description => 'The index to add the warmer too, remove it from or disable/enable it on, defaults to nil'
75
+ cmd.define :new_warmers_name, :default => nil, :description => 'Name of warmer to create, defaults to nil'
76
+ cmd.define :create_warmer, :default => nil, :description => 'Query to create warmer, defaults to nil'
77
+ cmd.define :remove_warmer, :default => nil, :description => 'Name of warmer to remove, defaults to nil'
78
+ cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
79
+ cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
80
+ end
81
+ Settings.define_command :cache, :description => "Interact with elasticsearch caches" do |cmd|
82
+ cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
83
+ cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
84
+ cmd.define :clear_all_cache, :type => :boolean, :default => nil, :description => 'Clear all caches expected true/false, defaults to nil'
85
+ cmd.define :clear_fielddata, :type => :boolean, :default => nil, :description => 'Clear filter cache expected true/false, defaults to nil'
86
+ cmd.define :clear_filter_cache, :type => :boolean, :default => nil, :description => 'Clear filter cache expected true/false, defaults to nil'
87
+ end
88
+ Settings.define_command :replay, :description => "Replay slow log queries" do |cmd|
89
+ cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
90
+ cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
91
+ cmd.define :execute_slow_queries, :default => nil, :description => 'Execute the slow log queries in the provided log file,ie --execute_slow_log=/var/log/elasticsearch/padraig.log, defaults to nil'
92
+ cmd.define :batch_size, :type => Integer, :default => nil, :description => 'The number of lines to process at once, defaults to nil'
93
+ cmd.define :preference, :type => Array, :default => [true,"_primary_first"], :description => 'Turn on/off preference and set it,ie --preference=true,_primary_first'
94
+ cmd.define :routing, :type => Array, :default => [false, ""], :description => 'Add routing, ie --routing=true,flight_id'
95
+ end
96
+ Settings.define_command :index_settings, :description => "Change the index settings" do |cmd|
97
+ cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
98
+ cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
99
+ cmd.define :settings_index, :default => nil, :description => 'The index that the settings listed in index_settings will be changed for, defaults to nil'
100
+ cmd.define :es_index_settings, :type => Array, :default => nil, :description => 'A comma deliminated list of elasticsearch index settings to be set for --settings_index, defaults to []'
101
+ cmd.define :es_index_settings_values, :type => Array, :default => nil, :description => 'A comma deliminated list of elasticsearch index settings values to be set for --settings_index, defaults to []'
102
+ end
103
+ Settings.resolve!
104
+
105
+
106
+ Squirrel.new(Settings.command_name, Settings.to_hash).task_caller
107
+
108
+
@@ -26,6 +26,9 @@ module Wukong
26
26
  settings.define(:es_id_field, :description => "If this field is present in a record, make an update request, otherwise make a create request", :wukong_hadoop => true)
27
27
  settings.define(:es_bulk_size, :description => "Number of requests to batch locally before making a request to ElasticSearch", :type => Integer, :wukong_hadoop => true)
28
28
  settings.define(:es_query, :description => "Query to use when defining input splits for ElasticSearch input", :wukong_hadoop => true)
29
+ settings.define(:es_transport, :description => "Use a transport client to an existing node instead of spinning up a new node", :default => true, type: :boolean, :wukong_hadoop => true)
30
+ settings.define(:es_transport_host, :description => "Host of existing node for transport client", default: 'localhost', :wukong_hadoop => true)
31
+ settings.define(:es_transport_port, :description => "Port of existing node for transport client", default: 9300, type: Integer, :wukong_hadoop => true)
29
32
  end
30
33
 
31
34
  # Boot Wonderdog with the given `settings` in the given `dir`.
@@ -105,7 +105,10 @@ module Wukong
105
105
 
106
106
  super() + [].tap do |o|
107
107
  if (reads_from_elasticsearch? || writes_to_elasticsearch?)
108
- o << java_opt('es.config', settings[:es_config])
108
+ o << java_opt('es.config', settings[:es_config])
109
+ o << java_opt('elasticsearch.transport', settings[:es_transport])
110
+ o << java_opt('elasticsearch.transport.host', settings[:es_transport_host]) if settings[:es_transport] && settings[:es_transport_host]
111
+ o << java_opt('elasticsearch.transport.port', settings[:es_transport_port]) if settings[:es_transport] && settings[:es_transport_port]
109
112
  end
110
113
 
111
114
  if reads_from_elasticsearch?
@@ -1,3 +1,3 @@
1
1
  module Wonderdog
2
- VERSION = '0.1.1'
2
+ VERSION = '0.2.0'
3
3
  end
data/pom.xml CHANGED
@@ -18,7 +18,7 @@
18
18
  <dependency>
19
19
  <groupId>org.elasticsearch</groupId>
20
20
  <artifactId>elasticsearch</artifactId>
21
- <version>0.19.8</version>
21
+ <version>0.90.5</version>
22
22
  </dependency>
23
23
 
24
24
  <dependency>
@@ -21,7 +21,7 @@ RSpec.configure do |config|
21
21
 
22
22
  def hadoop_runner *args, &block
23
23
  runner(Wukong::Hadoop::HadoopRunner, 'wu-hadoop', *args) do
24
- stub!(:execute_command!)
24
+ stub(:execute_command!)
25
25
  instance_eval(&block) if block_given?
26
26
  end
27
27
  end
@@ -8,7 +8,7 @@ describe Wukong::Elasticsearch::HadoopInvocationOverride do
8
8
  let(:es_complex) { hadoop_runner('regexp', 'count', input: 'es://the_index/the_map', output: 'es:///the_index/the_map', es_query: '{"hi": "there"}', es_request_size: 1000, es_index_field: 'ID', map_speculative: true, reduce_speculative: true) }
9
9
 
10
10
  context "passing necessary jars to Hadoop streaming" do
11
- before { Dir.stub!(:[]).and_return(["/lib/dir/elasticsearch.jar"], ["/lib/dir/wonderdog.jar"]) }
11
+ before { Dir.stub(:[]).and_return(["/lib/dir/elasticsearch.jar"], ["/lib/dir/wonderdog.jar"]) }
12
12
  context "when not given explicit jars" do
13
13
  context "and not interacting with Elasticsearch" do
14
14
  it "doesn't add jars" do
@@ -0,0 +1,95 @@
1
+ require 'configliere'
2
+ require 'json'
3
+
4
+ Settings.use :commandline
5
+ Settings.use :config_block
6
+ Settings.define :es_index, default: nil
7
+ Settings.define :fields, default: nil
8
+ Settings.define :one_index, default: false, flag: 'o'
9
+ Settings.define :composite_key, default: nil
10
+ Settings.define :es_type
11
+ Settings.finally do |settings|
12
+ settings.es_index ||= settings.one_index ? [settings.es_type, "cnt"].join : 'ad_activity'
13
+ end
14
+ Settings.resolve!
15
+
16
+ puts "using es index #{Settings.es_index} and type #{Settings.es_type}"
17
+
18
+ index = Settings.es_index
19
+ type = Settings.es_type
20
+
21
+ response = `curl localhost:9200/#{Settings.es_index}/_mapping/`
22
+ puts JSON.parse(response)["#{index}"]["#{type}"]#["properties"]
23
+ fields = JSON.parse(response)["#{index}"]["#{type}"]["properties"].keys#.select{|x| x.end_with?("_id")}
24
+ #fields += %w[metric feature]# browser_ua]
25
+ #fields = JSON.parse(response)["#{type}_legacy"][type]["properties"].keys.select{|x| x.end_with?("_id")}
26
+
27
+ puts "got fields #{Settings.fields || fields}"
28
+
29
+ # site_count => site_composite
30
+ # placement_count => pl_composite
31
+ # flight_count => metric_feature
32
+
33
+ composite_key = case Settings.es_type
34
+ when "site_count" then "site_composite"
35
+ when "placement_count" then "pl_composite"
36
+ when "flight_count" then "metric_feature"
37
+ else nil
38
+ end
39
+
40
+ doc = {
41
+ "query"=> {
42
+ "match_all"=> {}
43
+ },
44
+ "facets"=> {
45
+ # "ignz"=> {
46
+ # "statistical"=> {
47
+ # "field" => "cnt"
48
+ # }
49
+ # },
50
+ "igna"=> {
51
+ "date_histogram"=> {
52
+ "field"=> "tb_h",
53
+ "interval" => "day"
54
+ }
55
+ },
56
+ "ignb"=> {
57
+ "terms"=> {
58
+ "fields"=> [Settings.fields || fields].flatten
59
+ }
60
+ },
61
+ }
62
+ }
63
+
64
+ fields.each do |field|
65
+ doc["facets"]["ign_#{field}"] = {
66
+ "terms_stats" => {
67
+ "key_field" => field,
68
+ "value_field" => "cnt"
69
+ }
70
+ }
71
+ end
72
+
73
+ if not composite_key.nil?
74
+ doc["facets"]["ignc"] = {
75
+ "terms_stats" => {
76
+ "key_field" => Settings.composite_key || composite_key,
77
+ "value_field" => "cnt"
78
+ }
79
+ }
80
+ end
81
+
82
+ puts "about to query with " + doc.to_s
83
+
84
+ results = `curl localhost:9200/#{Settings.es_index}/#{Settings.es_type}/_search/?pretty=true -d '#{JSON.generate(doc)}'`
85
+ term_hash = {}
86
+ resultsLineArray = results.split( /\r?\n/ )
87
+ copy_resultsLineArray = resultsLineArray - []
88
+ copy_resultsLineArray.each_with_index do |line, index|
89
+ if line.include?("\"term\" : \"")
90
+ term_hash[line] = index
91
+ end
92
+ end
93
+ puts `curl localhost:9200/#{Settings.es_index}/#{Settings.es_type}/_search/?pretty=true -d '#{JSON.generate(doc)}'`
94
+ puts "from command: \n curl localhost:9200/#{Settings.es_index}/#{Settings.es_type}/_search/ -d '#{JSON.generate(doc)}'"
95
+ puts "number of terms: #{term_hash.keys.size()}"
@@ -0,0 +1,19 @@
1
+ class ChangeESIndexSettings
2
+ def initialize(options = {})
3
+ @host = options[:host]
4
+ @port = options[:port]
5
+ @index = options[:index]
6
+ @settings_and_values = options[:settings_and_values]
7
+ end
8
+
9
+ def change_setting(setting, value)
10
+ puts "changing setting #{setting} to value #{value}"
11
+ `curl -s -XPUT 'http://#{@host}:#{@port}/#{@index}/_settings?pretty=true' -d '{ "#{setting}":"#{value}" }'`
12
+ end
13
+
14
+ def run
15
+ @settings_and_values.each do |setting, value|
16
+ change_setting(setting, value)
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,30 @@
1
+ class ClearESCaches
2
+ def initialize(options={})
3
+ @to_clear = options[:type]
4
+ @host = options[:host]
5
+ @port = options[:port]
6
+ end
7
+
8
+ def clear_all
9
+ `curl -s -XPOST 'http://#{@host}:#{@port}/_all/_cache/clear?field_data=true&filter=true&bloom=true' ; echo`
10
+ end
11
+
12
+ def clear_filter_cache
13
+ `curl -s -XPOST 'http://#{@host}:#{@port}/_all/_cache/clear?field_data=false&filter=true&bloom=true' ; echo`
14
+ end
15
+
16
+ def clear_fielddata
17
+ `curl -s -XPOST 'http://#{@host}:#{@port}/_all/_cache/clear?field_data=true&filter=false&bloom=true' ; echo`
18
+ end
19
+
20
+ def run
21
+ puts @to_clear
22
+ case command = @to_clear.to_sym
23
+ when :all then clear_all
24
+ when :filter then clear_filter_cache
25
+ when :fielddata then clear_fielddata
26
+ else abort "#{command} not recognized"
27
+ end
28
+ end
29
+
30
+ end