wonderdog 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. data/.gitignore +2 -0
  2. data/.idea/encodings.xml +5 -0
  3. data/.idea/misc.xml +5 -0
  4. data/.idea/modules.xml +9 -0
  5. data/.idea/scopes/scope_settings.xml +5 -0
  6. data/.idea/vcs.xml +7 -0
  7. data/.idea/wonderdog.iml +41 -0
  8. data/Gemfile +1 -1
  9. data/bin/estool +22 -1
  10. data/bin/squirrel.rb +108 -0
  11. data/lib/wonderdog.rb +3 -0
  12. data/lib/wonderdog/hadoop_invocation_override.rb +4 -1
  13. data/lib/wonderdog/version.rb +1 -1
  14. data/pom.xml +1 -1
  15. data/spec/spec_helper.rb +1 -1
  16. data/spec/wonderdog/hadoop_invocation_override_spec.rb +1 -1
  17. data/squirrel/all_facets.rb +95 -0
  18. data/squirrel/change_es_index_settings.rb +19 -0
  19. data/squirrel/clear_es_caches.rb +30 -0
  20. data/squirrel/esbackup.rb +184 -0
  21. data/squirrel/esbackup_stripped.rb +153 -0
  22. data/squirrel/fields.sh +5 -0
  23. data/squirrel/getFields.rb +19 -0
  24. data/squirrel/replay.rb +219 -0
  25. data/squirrel/squirrel.rb +95 -0
  26. data/squirrel/warmer_interface.rb +59 -0
  27. data/src/main/java/com/infochimps/elasticsearch/ElasticSearchInputFormat.java +2 -2
  28. data/src/main/java/com/infochimps/elasticsearch/ElasticSearchStreamingInputFormat.java +14 -2
  29. data/src/main/java/com/infochimps/elasticsearch/ElasticSearchStreamingOutputFormat.java +20 -5
  30. data/src/main/java/com/infochimps/elasticsearch/ElasticSearchStreamingRecordReader.java +55 -26
  31. data/src/main/java/com/infochimps/elasticsearch/ElasticSearchStreamingRecordWriter.java +59 -22
  32. data/test/cardinality.rb +43 -0
  33. data/test/change_es_index_settings.rb +19 -0
  34. data/test/clear_es_caches.rb +30 -0
  35. data/test/config/mapping.yml +327 -0
  36. data/test/config/mappings.yml +328 -0
  37. data/test/count_check.txt +0 -0
  38. data/test/esbackup_stripped.rb +153 -0
  39. data/test/mapping.yml +327 -0
  40. data/test/medium_slow_queries +41 -0
  41. data/test/queries.txt +0 -0
  42. data/test/quick_test_slow_queries +4 -0
  43. data/test/run_pry.rb +3 -0
  44. data/test/some_slow_queries +53 -0
  45. data/test/warmer_interface.rb +64 -0
  46. data/test/warmindices.rb +65 -0
  47. data/wonderdog.gemspec +1 -1
  48. metadata +40 -7
data/.gitignore CHANGED
@@ -49,3 +49,5 @@ TAGS*
49
49
  a.out
50
50
  doc
51
51
  nohup.out
52
+ test/flight_count_20130405
53
+ test/flight_count_20130405_unpacked
@@ -0,0 +1,5 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false" />
4
+ </project>
5
+
@@ -0,0 +1,5 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="rbenv: 1.9.3-p392" project-jdk-type="RUBY_SDK" />
4
+ </project>
5
+
@@ -0,0 +1,9 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/wonderdog.iml" filepath="$PROJECT_DIR$/.idea/wonderdog.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
9
+
@@ -0,0 +1,5 @@
1
+ <component name="DependencyValidationManager">
2
+ <state>
3
+ <option name="SKIP_IMPORT_STATEMENTS" value="false" />
4
+ </state>
5
+ </component>
@@ -0,0 +1,7 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$" vcs="Git" />
5
+ </component>
6
+ </project>
7
+
@@ -0,0 +1,41 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="RUBY_MODULE" version="4">
3
+ <component name="FacetManager">
4
+ <facet type="gem" name="Gem">
5
+ <configuration>
6
+ <option name="GEM_APP_ROOT_PATH" value="$MODULE_DIR$" />
7
+ <option name="GEM_APP_TEST_PATH" value="$MODULE_DIR$/test" />
8
+ <option name="GEM_APP_LIB_PATH" value="$MODULE_DIR$/lib" />
9
+ </configuration>
10
+ </facet>
11
+ </component>
12
+ <component name="NewModuleRootManager">
13
+ <content url="file://$MODULE_DIR$">
14
+ <sourceFolder url="file://$MODULE_DIR$/test" isTestSource="true" />
15
+ </content>
16
+ <orderEntry type="inheritedJdk" />
17
+ <orderEntry type="sourceFolder" forTests="false" />
18
+ <orderEntry type="library" scope="PROVIDED" name="bundler (v1.3.5, rbenv: 1.9.3-p392) [gem]" level="application" />
19
+ <orderEntry type="library" scope="PROVIDED" name="configliere (v0.4.18, rbenv: 1.9.3-p392) [gem]" level="application" />
20
+ <orderEntry type="library" scope="PROVIDED" name="diff-lcs (v1.2.4, rbenv: 1.9.3-p392) [gem]" level="application" />
21
+ <orderEntry type="library" scope="PROVIDED" name="eventmachine (v1.0.3, rbenv: 1.9.3-p392) [gem]" level="application" />
22
+ <orderEntry type="library" scope="PROVIDED" name="forgery (v0.5.0, rbenv: 1.9.3-p392) [gem]" level="application" />
23
+ <orderEntry type="library" scope="PROVIDED" name="gorillib (v0.5.0, rbenv: 1.9.3-p392) [gem]" level="application" />
24
+ <orderEntry type="library" scope="PROVIDED" name="highline (v1.6.18, rbenv: 1.9.3-p392) [gem]" level="application" />
25
+ <orderEntry type="library" scope="PROVIDED" name="json (v1.7.7, rbenv: 1.9.3-p392) [gem]" level="application" />
26
+ <orderEntry type="library" scope="PROVIDED" name="log4r (v1.1.10, rbenv: 1.9.3-p392) [gem]" level="application" />
27
+ <orderEntry type="library" scope="PROVIDED" name="multi_json (v1.7.2, rbenv: 1.9.3-p392) [gem]" level="application" />
28
+ <orderEntry type="library" scope="PROVIDED" name="rake (v0.9.6, rbenv: 1.9.3-p392) [gem]" level="application" />
29
+ <orderEntry type="library" scope="PROVIDED" name="redcarpet (v2.2.2, rbenv: 1.9.3-p392) [gem]" level="application" />
30
+ <orderEntry type="library" scope="PROVIDED" name="rspec (v2.13.0, rbenv: 1.9.3-p392) [gem]" level="application" />
31
+ <orderEntry type="library" scope="PROVIDED" name="rspec-core (v2.13.1, rbenv: 1.9.3-p392) [gem]" level="application" />
32
+ <orderEntry type="library" scope="PROVIDED" name="rspec-expectations (v2.13.0, rbenv: 1.9.3-p392) [gem]" level="application" />
33
+ <orderEntry type="library" scope="PROVIDED" name="rspec-mocks (v2.13.1, rbenv: 1.9.3-p392) [gem]" level="application" />
34
+ <orderEntry type="library" scope="PROVIDED" name="uuidtools (v2.1.3, rbenv: 1.9.3-p392) [gem]" level="application" />
35
+ <orderEntry type="library" scope="PROVIDED" name="vayacondios-client (v0.1.12, rbenv: 1.9.3-p392) [gem]" level="application" />
36
+ <orderEntry type="library" scope="PROVIDED" name="wukong (v3.0.1, rbenv: 1.9.3-p392) [gem]" level="application" />
37
+ <orderEntry type="library" scope="PROVIDED" name="wukong-hadoop (v0.1.1, rbenv: 1.9.3-p392) [gem]" level="application" />
38
+ <orderEntry type="library" scope="PROVIDED" name="yard (v0.8.6.1, rbenv: 1.9.3-p392) [gem]" level="application" />
39
+ </component>
40
+ </module>
41
+
data/Gemfile CHANGED
@@ -1,4 +1,4 @@
1
- source :rubygems
1
+ source 'https://rubygems.org'
2
2
 
3
3
  gemspec
4
4
 
data/bin/estool CHANGED
@@ -23,6 +23,9 @@ Commands include:
23
23
  optimize Optimizes the specified INDEX to (-s) number of segments
24
24
  snapshot Snapshots the specified INDEX to the gateway
25
25
  segments Returns the segment information. Requires ElasticSearch v
26
+ aliases Returns a list of Index/Alias pairs
27
+ ialiases Returns a list of Alias/Index pairs
28
+ count The number of documents in an index
26
29
  mapping
27
30
  set_replication
28
31
  search
@@ -89,7 +92,7 @@ class ESTool
89
92
 
90
93
  def status() shell_response(File.join(options.index, "_status?")) ; end
91
94
 
92
- def list() status["indices"].keys ; end
95
+ def list() status["indices"].keys.sort ; end
93
96
 
94
97
  def health() shell_response("_cluster/health?") ; end
95
98
 
@@ -111,6 +114,24 @@ class ESTool
111
114
  def segments() shell_response(File.join(options.index, "_segments")) ; end
112
115
 
113
116
  def mapping() shell_response(File.join(options.index, "_mapping")) ; end
117
+
118
+ def aliases() shell_response('_aliases?').sort.inject({}){ |hsh, (index, info)| hsh[index] = info['aliases'].keys ; hsh } ; end
119
+
120
+ def ialiases()
121
+ inverse = Hash.new{ |hsh, key| hsh[key] = [] }
122
+ aliases.each{ |idx, als| als.each{ |a| inverse[a] << idx } }
123
+ inverse
124
+ end
125
+
126
+ # estool status -r | ruby -rjson -e 'puts JSON.parse($stdin.read)["indices"]["item_count_legacy"]["docs"]'
127
+ # {"num_docs"=>187749, "max_doc"=>187749, "deleted_docs"=>0}
128
+ def count()
129
+ if options.index == '_all'
130
+ status['indices'].inject({}){ |hsh, (index, info)| hsh[index] = info['docs']['num_docs'] ; hsh }
131
+ else
132
+ { options.index => shell_response(File.join(options.index, '_count'))['count'] }
133
+ end
134
+ end
114
135
 
115
136
  # curl -s -XPUT http://host:port/index/_settings -d '{"index":{"number_of_replicas":num}}'
116
137
  def set_replication() { "error" => "method not yet implemented" }; end
@@ -0,0 +1,108 @@
1
+ #! /usr/bin/ruby
2
+
3
+ require "configliere"
4
+ require_relative "../squirrel/squirrel.rb"
5
+
6
+ doc = <<DOC
7
+ This is the uber script the arguements you give it decide what happens
8
+ squirrel => Standard Query Ultracrepidate Iamatology Ruby Resource for Elasticsearch Labarum ##
9
+ example commands:
10
+ clear all caches
11
+ ruby squirrel.rb cache --host=localhost --port=9200 --clear_all_cache=true
12
+ run slow log queries
13
+ ruby squirrel.rb replay --host=localhost --port=9200 --preference=true,_primary_first --execute_slow_queries=/var/log/elasticsearch/padraig.log
14
+ get backup an index aka generate a dumpfile
15
+ ruby squirrel.rb backup --host=localhost --port=9200 --output_dir="." --dump_index=flight_count_20130405 --batch_size=100 --dump_mapping=flight_count_20130405_mapping.json
16
+ get the cardinality of a dumpfile(card_file)
17
+ ruby squirrel.rb cardinality --host=localhost --port=9200 --output_dir="." --card_file=flight_count_20130405 --cardinality=cnt,metric
18
+ restore an index from a dumpfile
19
+ ruby squirrel.rb restore --host=localhost --port=9200 --output_dir="." --restore_file=flight_count_20130405.gz --restore_index=flight_count_20130405 --restore_mapping=flight_count_20130405_mapping.json --batch_size=100
20
+ duplicate files in an index from a dumpfile(duplicate_file)
21
+ ruby squirrel.rb duplicate --host=localhost --port=9200 --output_dir="." --duplicate_file=flight_count_20130405.gz --duplicate_index=eight_flight_count_20130405 --duplicate_mapping=flight_count_20130405_mapping.json --batch_size=100
22
+ add warmer
23
+ ruby squirrel.rb warmer --host=localhost --port=9200 --output_dir="." --new_warmers_name=polite_warmer --warmers_index=flight_count_20130408 --create_warmer='{"sort" : ["_state", "flight_id","metric", "tb_h", "feature", "seconds", "base_feature", "metric_feature", "cnt", "_score"],"query":{"match_all":{}}}'
24
+ remove warmer
25
+ ruby squirrel.rb warmer --host=localhost --port=9200 --output_dir="." --remove_warmer=polite_warmer --warmers_index=flight_count_20130408
26
+ disable warmers
27
+ ruby squirrel.rb warmer --host=localhost --port=9200 --output_dir="." --warmers=false --warmers_index=flight_count_20130405
28
+ enable warmers
29
+ ruby squirrel.rb warmer --host=localhost --port=9200 --output_dir="." --warmers=false --warmers_index=flight_count_20130405
30
+ remove warmer
31
+ ruby squirrel.rb warmer --host=localhost --port=9200 --output_dir="." --remove_warmer=polite_warmer --warmers_index=flight_count_20130405
32
+ change index settings
33
+ ruby squirrel.rb index_settings --host=localhost --port=9200 --output_dir="." --settings_index=flight_count_20130405 --es_index_settings=refresh_interval,refresh_interval --es_index_settings_values=-1,0
34
+ DOC
35
+
36
+ Settings.use :commandline
37
+ Settings.use :commands
38
+ Settings.description = doc
39
+ Settings.define_command :backup, :description => "Create a dump gzip file of an index" do |cmd|
40
+ cmd.define :output_dir, :default => nil, :description => 'Directory to put output, defaults to nil'
41
+ cmd.define :dump_file, :default => nil, :description => 'The name of the dumpfile to use, default is nil'
42
+ cmd.define :dump_index, :default => nil, :description => 'Index to use, default is nil'
43
+ cmd.define :query, :default => nil, :description => 'Query to use in order to limit the data extracted from the index, default nil'
44
+ cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
45
+ cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
46
+ cmd.define :dump_mapping, :default => nil, :description => 'The file to put the json mapping in, defaults to nil'
47
+ cmd.define :batch_size, :type => Integer, :default => nil, :description => 'The number of lines to process at once, defaults to nil'
48
+ end
49
+ Settings.define_command :restore, :description => "Take the data from a dump gzip file and use it to populate an index" do |cmd|
50
+ cmd.define :output_dir, :default => nil, :description => 'Directory to put output, defaults to nil'
51
+ cmd.define :restore_file, :default => nil, :description => 'The name of the dumpfile to use, default is nil'
52
+ cmd.define :restore_index, :default => nil, :description => 'Index to use, default is nil'
53
+ cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
54
+ cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
55
+ cmd.define :restore_mapping, :default => nil, :description => 'The mapping file to use when restoring an index, defaults to nil'
56
+ cmd.define :batch_size, :type => Integer, :default => nil, :description => 'The number of lines to process at once, defaults to nil'
57
+ end
58
+ Settings.define_command :duplicate, :description => "Take the data from a dump gzip file and add it into an index allowing documents to be duplicated" do |cmd|
59
+ cmd.define :output_dir, :default => nil, :description => 'Directory to put output, defaults to nil'
60
+ cmd.define :duplicate_file, :default => nil, :description => 'The name of the dumpfile to use, default is nil'
61
+ cmd.define :duplicate_index, :default => nil, :description => 'Index to use, default is nil'
62
+ cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
63
+ cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
64
+ cmd.define :duplicated_mapping, :default => nil, :description => 'The mapping file to use when restoring an index, defaults to nil'
65
+ cmd.define :batch_size, :type => Integer, :default => nil, :description => 'The number of lines to process at once, defaults to nil'
66
+ end
67
+ Settings.define_command :cardinality, :description => "Count the number of unique valuse for the given field(s)" do |cmd|
68
+ cmd.define :output_dir, :default => nil, :description => 'Directory to put output, defaults to nil'
69
+ cmd.define :cardinality, :type => Array, :default => nil, :description => 'Return the cardinality of the given fields, defaults to nil'
70
+ cmd.define :card_file, :default => nil, :description => 'The dump file to grab info from when determining cardinality MUST NOT be compressed, defaults to nil'
71
+ end
72
+ Settings.define_command :warmer, :description => "Interact elasticsearch warmers" do |cmd|
73
+ cmd.define :warmers, :default => nil, :description => 'Use warmers expected values true/false, defaults to nil'
74
+ cmd.define :warmers_index, :default => nil, :description => 'The index to add the warmer too, remove it from or disable/enable it on, defaults to nil'
75
+ cmd.define :new_warmers_name, :default => nil, :description => 'Name of warmer to create, defaults to nil'
76
+ cmd.define :create_warmer, :default => nil, :description => 'Query to create warmer, defaults to nil'
77
+ cmd.define :remove_warmer, :default => nil, :description => 'Name of warmer to remove, defaults to nil'
78
+ cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
79
+ cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
80
+ end
81
+ Settings.define_command :cache, :description => "Interact with elasticsearch caches" do |cmd|
82
+ cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
83
+ cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
84
+ cmd.define :clear_all_cache, :type => :boolean, :default => nil, :description => 'Clear all caches expected true/false, defaults to nil'
85
+ cmd.define :clear_fielddata, :type => :boolean, :default => nil, :description => 'Clear filter cache expected true/false, defaults to nil'
86
+ cmd.define :clear_filter_cache, :type => :boolean, :default => nil, :description => 'Clear filter cache expected true/false, defaults to nil'
87
+ end
88
+ Settings.define_command :replay, :description => "Replay slow log queries" do |cmd|
89
+ cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
90
+ cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
91
+ cmd.define :execute_slow_queries, :default => nil, :description => 'Execute the slow log queries in the provided log file,ie --execute_slow_log=/var/log/elasticsearch/padraig.log, defaults to nil'
92
+ cmd.define :batch_size, :type => Integer, :default => nil, :description => 'The number of lines to process at once, defaults to nil'
93
+ cmd.define :preference, :type => Array, :default => [true,"_primary_first"], :description => 'Turn on/off preference and set it,ie --preference=true,_primary_first'
94
+ cmd.define :routing, :type => Array, :default => [false, ""], :description => 'Add routing, ie --routing=true,flight_id'
95
+ end
96
+ Settings.define_command :index_settings, :description => "Change the index settings" do |cmd|
97
+ cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
98
+ cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
99
+ cmd.define :settings_index, :default => nil, :description => 'The index that the settings listed in index_settings will be changed for, defaults to nil'
100
+ cmd.define :es_index_settings, :type => Array, :default => nil, :description => 'A comma deliminated list of elasticsearch index settings to be set for --settings_index, defaults to []'
101
+ cmd.define :es_index_settings_values, :type => Array, :default => nil, :description => 'A comma deliminated list of elasticsearch index settings values to be set for --settings_index, defaults to []'
102
+ end
103
+ Settings.resolve!
104
+
105
+
106
+ Squirrel.new(Settings.command_name, Settings.to_hash).task_caller
107
+
108
+
@@ -26,6 +26,9 @@ module Wukong
26
26
  settings.define(:es_id_field, :description => "If this field is present in a record, make an update request, otherwise make a create request", :wukong_hadoop => true)
27
27
  settings.define(:es_bulk_size, :description => "Number of requests to batch locally before making a request to ElasticSearch", :type => Integer, :wukong_hadoop => true)
28
28
  settings.define(:es_query, :description => "Query to use when defining input splits for ElasticSearch input", :wukong_hadoop => true)
29
+ settings.define(:es_transport, :description => "Use a transport client to an existing node instead of spinning up a new node", :default => true, type: :boolean, :wukong_hadoop => true)
30
+ settings.define(:es_transport_host, :description => "Host of existing node for transport client", default: 'localhost', :wukong_hadoop => true)
31
+ settings.define(:es_transport_port, :description => "Port of existing node for transport client", default: 9300, type: Integer, :wukong_hadoop => true)
29
32
  end
30
33
 
31
34
  # Boot Wonderdog with the given `settings` in the given `dir`.
@@ -105,7 +105,10 @@ module Wukong
105
105
 
106
106
  super() + [].tap do |o|
107
107
  if (reads_from_elasticsearch? || writes_to_elasticsearch?)
108
- o << java_opt('es.config', settings[:es_config])
108
+ o << java_opt('es.config', settings[:es_config])
109
+ o << java_opt('elasticsearch.transport', settings[:es_transport])
110
+ o << java_opt('elasticsearch.transport.host', settings[:es_transport_host]) if settings[:es_transport] && settings[:es_transport_host]
111
+ o << java_opt('elasticsearch.transport.port', settings[:es_transport_port]) if settings[:es_transport] && settings[:es_transport_port]
109
112
  end
110
113
 
111
114
  if reads_from_elasticsearch?
@@ -1,3 +1,3 @@
1
1
  module Wonderdog
2
- VERSION = '0.1.1'
2
+ VERSION = '0.2.0'
3
3
  end
data/pom.xml CHANGED
@@ -18,7 +18,7 @@
18
18
  <dependency>
19
19
  <groupId>org.elasticsearch</groupId>
20
20
  <artifactId>elasticsearch</artifactId>
21
- <version>0.19.8</version>
21
+ <version>0.90.5</version>
22
22
  </dependency>
23
23
 
24
24
  <dependency>
@@ -21,7 +21,7 @@ RSpec.configure do |config|
21
21
 
22
22
  def hadoop_runner *args, &block
23
23
  runner(Wukong::Hadoop::HadoopRunner, 'wu-hadoop', *args) do
24
- stub!(:execute_command!)
24
+ stub(:execute_command!)
25
25
  instance_eval(&block) if block_given?
26
26
  end
27
27
  end
@@ -8,7 +8,7 @@ describe Wukong::Elasticsearch::HadoopInvocationOverride do
8
8
  let(:es_complex) { hadoop_runner('regexp', 'count', input: 'es://the_index/the_map', output: 'es:///the_index/the_map', es_query: '{"hi": "there"}', es_request_size: 1000, es_index_field: 'ID', map_speculative: true, reduce_speculative: true) }
9
9
 
10
10
  context "passing necessary jars to Hadoop streaming" do
11
- before { Dir.stub!(:[]).and_return(["/lib/dir/elasticsearch.jar"], ["/lib/dir/wonderdog.jar"]) }
11
+ before { Dir.stub(:[]).and_return(["/lib/dir/elasticsearch.jar"], ["/lib/dir/wonderdog.jar"]) }
12
12
  context "when not given explicit jars" do
13
13
  context "and not interacting with Elasticsearch" do
14
14
  it "doesn't add jars" do
@@ -0,0 +1,95 @@
1
+ require 'configliere'
2
+ require 'json'
3
+
4
+ Settings.use :commandline
5
+ Settings.use :config_block
6
+ Settings.define :es_index, default: nil
7
+ Settings.define :fields, default: nil
8
+ Settings.define :one_index, default: false, flag: 'o'
9
+ Settings.define :composite_key, default: nil
10
+ Settings.define :es_type
11
+ Settings.finally do |settings|
12
+ settings.es_index ||= settings.one_index ? [settings.es_type, "cnt"].join : 'ad_activity'
13
+ end
14
+ Settings.resolve!
15
+
16
+ puts "using es index #{Settings.es_index} and type #{Settings.es_type}"
17
+
18
+ index = Settings.es_index
19
+ type = Settings.es_type
20
+
21
+ response = `curl localhost:9200/#{Settings.es_index}/_mapping/`
22
+ puts JSON.parse(response)["#{index}"]["#{type}"]#["properties"]
23
+ fields = JSON.parse(response)["#{index}"]["#{type}"]["properties"].keys#.select{|x| x.end_with?("_id")}
24
+ #fields += %w[metric feature]# browser_ua]
25
+ #fields = JSON.parse(response)["#{type}_legacy"][type]["properties"].keys.select{|x| x.end_with?("_id")}
26
+
27
+ puts "got fields #{Settings.fields || fields}"
28
+
29
+ # site_count => site_composite
30
+ # placement_count => pl_composite
31
+ # flight_count => metric_feature
32
+
33
+ composite_key = case Settings.es_type
34
+ when "site_count" then "site_composite"
35
+ when "placement_count" then "pl_composite"
36
+ when "flight_count" then "metric_feature"
37
+ else nil
38
+ end
39
+
40
+ doc = {
41
+ "query"=> {
42
+ "match_all"=> {}
43
+ },
44
+ "facets"=> {
45
+ # "ignz"=> {
46
+ # "statistical"=> {
47
+ # "field" => "cnt"
48
+ # }
49
+ # },
50
+ "igna"=> {
51
+ "date_histogram"=> {
52
+ "field"=> "tb_h",
53
+ "interval" => "day"
54
+ }
55
+ },
56
+ "ignb"=> {
57
+ "terms"=> {
58
+ "fields"=> [Settings.fields || fields].flatten
59
+ }
60
+ },
61
+ }
62
+ }
63
+
64
+ fields.each do |field|
65
+ doc["facets"]["ign_#{field}"] = {
66
+ "terms_stats" => {
67
+ "key_field" => field,
68
+ "value_field" => "cnt"
69
+ }
70
+ }
71
+ end
72
+
73
+ if not composite_key.nil?
74
+ doc["facets"]["ignc"] = {
75
+ "terms_stats" => {
76
+ "key_field" => Settings.composite_key || composite_key,
77
+ "value_field" => "cnt"
78
+ }
79
+ }
80
+ end
81
+
82
+ puts "about to query with " + doc.to_s
83
+
84
+ results = `curl localhost:9200/#{Settings.es_index}/#{Settings.es_type}/_search/?pretty=true -d '#{JSON.generate(doc)}'`
85
+ term_hash = {}
86
+ resultsLineArray = results.split( /\r?\n/ )
87
+ copy_resultsLineArray = resultsLineArray - []
88
+ copy_resultsLineArray.each_with_index do |line, index|
89
+ if line.include?("\"term\" : \"")
90
+ term_hash[line] = index
91
+ end
92
+ end
93
+ puts `curl localhost:9200/#{Settings.es_index}/#{Settings.es_type}/_search/?pretty=true -d '#{JSON.generate(doc)}'`
94
+ puts "from command: \n curl localhost:9200/#{Settings.es_index}/#{Settings.es_type}/_search/ -d '#{JSON.generate(doc)}'"
95
+ puts "number of terms: #{term_hash.keys.size()}"
@@ -0,0 +1,19 @@
1
+ class ChangeESIndexSettings
2
+ def initialize(options = {})
3
+ @host = options[:host]
4
+ @port = options[:port]
5
+ @index = options[:index]
6
+ @settings_and_values = options[:settings_and_values]
7
+ end
8
+
9
+ def change_setting(setting, value)
10
+ puts "changing setting #{setting} to value #{value}"
11
+ `curl -s -XPUT 'http://#{@host}:#{@port}/#{@index}/_settings?pretty=true' -d '{ "#{setting}":"#{value}" }'`
12
+ end
13
+
14
+ def run
15
+ @settings_and_values.each do |setting, value|
16
+ change_setting(setting, value)
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,30 @@
1
+ class ClearESCaches
2
+ def initialize(options={})
3
+ @to_clear = options[:type]
4
+ @host = options[:host]
5
+ @port = options[:port]
6
+ end
7
+
8
+ def clear_all
9
+ `curl -s -XPOST 'http://#{@host}:#{@port}/_all/_cache/clear?field_data=true&filter=true&bloom=true' ; echo`
10
+ end
11
+
12
+ def clear_filter_cache
13
+ `curl -s -XPOST 'http://#{@host}:#{@port}/_all/_cache/clear?field_data=false&filter=true&bloom=true' ; echo`
14
+ end
15
+
16
+ def clear_fielddata
17
+ `curl -s -XPOST 'http://#{@host}:#{@port}/_all/_cache/clear?field_data=true&filter=false&bloom=true' ; echo`
18
+ end
19
+
20
+ def run
21
+ puts @to_clear
22
+ case command = @to_clear.to_sym
23
+ when :all then clear_all
24
+ when :filter then clear_filter_cache
25
+ when :fielddata then clear_fielddata
26
+ else abort "#{command} not recognized"
27
+ end
28
+ end
29
+
30
+ end