wonderdog 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -0
- data/.idea/encodings.xml +5 -0
- data/.idea/misc.xml +5 -0
- data/.idea/modules.xml +9 -0
- data/.idea/scopes/scope_settings.xml +5 -0
- data/.idea/vcs.xml +7 -0
- data/.idea/wonderdog.iml +41 -0
- data/Gemfile +1 -1
- data/bin/estool +22 -1
- data/bin/squirrel.rb +108 -0
- data/lib/wonderdog.rb +3 -0
- data/lib/wonderdog/hadoop_invocation_override.rb +4 -1
- data/lib/wonderdog/version.rb +1 -1
- data/pom.xml +1 -1
- data/spec/spec_helper.rb +1 -1
- data/spec/wonderdog/hadoop_invocation_override_spec.rb +1 -1
- data/squirrel/all_facets.rb +95 -0
- data/squirrel/change_es_index_settings.rb +19 -0
- data/squirrel/clear_es_caches.rb +30 -0
- data/squirrel/esbackup.rb +184 -0
- data/squirrel/esbackup_stripped.rb +153 -0
- data/squirrel/fields.sh +5 -0
- data/squirrel/getFields.rb +19 -0
- data/squirrel/replay.rb +219 -0
- data/squirrel/squirrel.rb +95 -0
- data/squirrel/warmer_interface.rb +59 -0
- data/src/main/java/com/infochimps/elasticsearch/ElasticSearchInputFormat.java +2 -2
- data/src/main/java/com/infochimps/elasticsearch/ElasticSearchStreamingInputFormat.java +14 -2
- data/src/main/java/com/infochimps/elasticsearch/ElasticSearchStreamingOutputFormat.java +20 -5
- data/src/main/java/com/infochimps/elasticsearch/ElasticSearchStreamingRecordReader.java +55 -26
- data/src/main/java/com/infochimps/elasticsearch/ElasticSearchStreamingRecordWriter.java +59 -22
- data/test/cardinality.rb +43 -0
- data/test/change_es_index_settings.rb +19 -0
- data/test/clear_es_caches.rb +30 -0
- data/test/config/mapping.yml +327 -0
- data/test/config/mappings.yml +328 -0
- data/test/count_check.txt +0 -0
- data/test/esbackup_stripped.rb +153 -0
- data/test/mapping.yml +327 -0
- data/test/medium_slow_queries +41 -0
- data/test/queries.txt +0 -0
- data/test/quick_test_slow_queries +4 -0
- data/test/run_pry.rb +3 -0
- data/test/some_slow_queries +53 -0
- data/test/warmer_interface.rb +64 -0
- data/test/warmindices.rb +65 -0
- data/wonderdog.gemspec +1 -1
- metadata +40 -7
data/.gitignore
CHANGED
data/.idea/encodings.xml
ADDED
data/.idea/misc.xml
ADDED
data/.idea/modules.xml
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<project version="4">
|
3
|
+
<component name="ProjectModuleManager">
|
4
|
+
<modules>
|
5
|
+
<module fileurl="file://$PROJECT_DIR$/.idea/wonderdog.iml" filepath="$PROJECT_DIR$/.idea/wonderdog.iml" />
|
6
|
+
</modules>
|
7
|
+
</component>
|
8
|
+
</project>
|
9
|
+
|
data/.idea/vcs.xml
ADDED
data/.idea/wonderdog.iml
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<module type="RUBY_MODULE" version="4">
|
3
|
+
<component name="FacetManager">
|
4
|
+
<facet type="gem" name="Gem">
|
5
|
+
<configuration>
|
6
|
+
<option name="GEM_APP_ROOT_PATH" value="$MODULE_DIR$" />
|
7
|
+
<option name="GEM_APP_TEST_PATH" value="$MODULE_DIR$/test" />
|
8
|
+
<option name="GEM_APP_LIB_PATH" value="$MODULE_DIR$/lib" />
|
9
|
+
</configuration>
|
10
|
+
</facet>
|
11
|
+
</component>
|
12
|
+
<component name="NewModuleRootManager">
|
13
|
+
<content url="file://$MODULE_DIR$">
|
14
|
+
<sourceFolder url="file://$MODULE_DIR$/test" isTestSource="true" />
|
15
|
+
</content>
|
16
|
+
<orderEntry type="inheritedJdk" />
|
17
|
+
<orderEntry type="sourceFolder" forTests="false" />
|
18
|
+
<orderEntry type="library" scope="PROVIDED" name="bundler (v1.3.5, rbenv: 1.9.3-p392) [gem]" level="application" />
|
19
|
+
<orderEntry type="library" scope="PROVIDED" name="configliere (v0.4.18, rbenv: 1.9.3-p392) [gem]" level="application" />
|
20
|
+
<orderEntry type="library" scope="PROVIDED" name="diff-lcs (v1.2.4, rbenv: 1.9.3-p392) [gem]" level="application" />
|
21
|
+
<orderEntry type="library" scope="PROVIDED" name="eventmachine (v1.0.3, rbenv: 1.9.3-p392) [gem]" level="application" />
|
22
|
+
<orderEntry type="library" scope="PROVIDED" name="forgery (v0.5.0, rbenv: 1.9.3-p392) [gem]" level="application" />
|
23
|
+
<orderEntry type="library" scope="PROVIDED" name="gorillib (v0.5.0, rbenv: 1.9.3-p392) [gem]" level="application" />
|
24
|
+
<orderEntry type="library" scope="PROVIDED" name="highline (v1.6.18, rbenv: 1.9.3-p392) [gem]" level="application" />
|
25
|
+
<orderEntry type="library" scope="PROVIDED" name="json (v1.7.7, rbenv: 1.9.3-p392) [gem]" level="application" />
|
26
|
+
<orderEntry type="library" scope="PROVIDED" name="log4r (v1.1.10, rbenv: 1.9.3-p392) [gem]" level="application" />
|
27
|
+
<orderEntry type="library" scope="PROVIDED" name="multi_json (v1.7.2, rbenv: 1.9.3-p392) [gem]" level="application" />
|
28
|
+
<orderEntry type="library" scope="PROVIDED" name="rake (v0.9.6, rbenv: 1.9.3-p392) [gem]" level="application" />
|
29
|
+
<orderEntry type="library" scope="PROVIDED" name="redcarpet (v2.2.2, rbenv: 1.9.3-p392) [gem]" level="application" />
|
30
|
+
<orderEntry type="library" scope="PROVIDED" name="rspec (v2.13.0, rbenv: 1.9.3-p392) [gem]" level="application" />
|
31
|
+
<orderEntry type="library" scope="PROVIDED" name="rspec-core (v2.13.1, rbenv: 1.9.3-p392) [gem]" level="application" />
|
32
|
+
<orderEntry type="library" scope="PROVIDED" name="rspec-expectations (v2.13.0, rbenv: 1.9.3-p392) [gem]" level="application" />
|
33
|
+
<orderEntry type="library" scope="PROVIDED" name="rspec-mocks (v2.13.1, rbenv: 1.9.3-p392) [gem]" level="application" />
|
34
|
+
<orderEntry type="library" scope="PROVIDED" name="uuidtools (v2.1.3, rbenv: 1.9.3-p392) [gem]" level="application" />
|
35
|
+
<orderEntry type="library" scope="PROVIDED" name="vayacondios-client (v0.1.12, rbenv: 1.9.3-p392) [gem]" level="application" />
|
36
|
+
<orderEntry type="library" scope="PROVIDED" name="wukong (v3.0.1, rbenv: 1.9.3-p392) [gem]" level="application" />
|
37
|
+
<orderEntry type="library" scope="PROVIDED" name="wukong-hadoop (v0.1.1, rbenv: 1.9.3-p392) [gem]" level="application" />
|
38
|
+
<orderEntry type="library" scope="PROVIDED" name="yard (v0.8.6.1, rbenv: 1.9.3-p392) [gem]" level="application" />
|
39
|
+
</component>
|
40
|
+
</module>
|
41
|
+
|
data/Gemfile
CHANGED
data/bin/estool
CHANGED
@@ -23,6 +23,9 @@ Commands include:
|
|
23
23
|
optimize Optimizes the specified INDEX to (-s) number of segments
|
24
24
|
snapshot Snapshots the specified INDEX to the gateway
|
25
25
|
segments Returns the segment information. Requires ElasticSearch v
|
26
|
+
aliases Returns a list of Index/Alias pairs
|
27
|
+
ialiases Returns a list of Alias/Index pairs
|
28
|
+
count The number of documents in an index
|
26
29
|
mapping
|
27
30
|
set_replication
|
28
31
|
search
|
@@ -89,7 +92,7 @@ class ESTool
|
|
89
92
|
|
90
93
|
def status() shell_response(File.join(options.index, "_status?")) ; end
|
91
94
|
|
92
|
-
def list() status["indices"].keys ; end
|
95
|
+
def list() status["indices"].keys.sort ; end
|
93
96
|
|
94
97
|
def health() shell_response("_cluster/health?") ; end
|
95
98
|
|
@@ -111,6 +114,24 @@ class ESTool
|
|
111
114
|
def segments() shell_response(File.join(options.index, "_segments")) ; end
|
112
115
|
|
113
116
|
def mapping() shell_response(File.join(options.index, "_mapping")) ; end
|
117
|
+
|
118
|
+
def aliases() shell_response('_aliases?').sort.inject({}){ |hsh, (index, info)| hsh[index] = info['aliases'].keys ; hsh } ; end
|
119
|
+
|
120
|
+
def ialiases()
|
121
|
+
inverse = Hash.new{ |hsh, key| hsh[key] = [] }
|
122
|
+
aliases.each{ |idx, als| als.each{ |a| inverse[a] << idx } }
|
123
|
+
inverse
|
124
|
+
end
|
125
|
+
|
126
|
+
# estool status -r | ruby -rjson -e 'puts JSON.parse($stdin.read)["indices"]["item_count_legacy"]["docs"]'
|
127
|
+
# {"num_docs"=>187749, "max_doc"=>187749, "deleted_docs"=>0}
|
128
|
+
def count()
|
129
|
+
if options.index == '_all'
|
130
|
+
status['indices'].inject({}){ |hsh, (index, info)| hsh[index] = info['docs']['num_docs'] ; hsh }
|
131
|
+
else
|
132
|
+
{ options.index => shell_response(File.join(options.index, '_count'))['count'] }
|
133
|
+
end
|
134
|
+
end
|
114
135
|
|
115
136
|
# curl -s -XPUT http://host:port/index/_settings -d '{"index":{"number_of_replicas":num}}'
|
116
137
|
def set_replication() { "error" => "method not yet implemented" }; end
|
data/bin/squirrel.rb
ADDED
@@ -0,0 +1,108 @@
|
|
1
|
+
#! /usr/bin/ruby
|
2
|
+
|
3
|
+
require "configliere"
|
4
|
+
require_relative "../squirrel/squirrel.rb"
|
5
|
+
|
6
|
+
doc = <<DOC
|
7
|
+
This is the uber script the arguements you give it decide what happens
|
8
|
+
squirrel => Standard Query Ultracrepidate Iamatology Ruby Resource for Elasticsearch Labarum ##
|
9
|
+
example commands:
|
10
|
+
clear all caches
|
11
|
+
ruby squirrel.rb cache --host=localhost --port=9200 --clear_all_cache=true
|
12
|
+
run slow log queries
|
13
|
+
ruby squirrel.rb replay --host=localhost --port=9200 --preference=true,_primary_first --execute_slow_queries=/var/log/elasticsearch/padraig.log
|
14
|
+
get backup an index aka generate a dumpfile
|
15
|
+
ruby squirrel.rb backup --host=localhost --port=9200 --output_dir="." --dump_index=flight_count_20130405 --batch_size=100 --dump_mapping=flight_count_20130405_mapping.json
|
16
|
+
get the cardinality of a dumpfile(card_file)
|
17
|
+
ruby squirrel.rb cardinality --host=localhost --port=9200 --output_dir="." --card_file=flight_count_20130405 --cardinality=cnt,metric
|
18
|
+
restore an index from a dumpfile
|
19
|
+
ruby squirrel.rb restore --host=localhost --port=9200 --output_dir="." --restore_file=flight_count_20130405.gz --restore_index=flight_count_20130405 --restore_mapping=flight_count_20130405_mapping.json --batch_size=100
|
20
|
+
duplicate files in an index from a dumpfile(duplicate_file)
|
21
|
+
ruby squirrel.rb duplicate --host=localhost --port=9200 --output_dir="." --duplicate_file=flight_count_20130405.gz --duplicate_index=eight_flight_count_20130405 --duplicate_mapping=flight_count_20130405_mapping.json --batch_size=100
|
22
|
+
add warmer
|
23
|
+
ruby squirrel.rb warmer --host=localhost --port=9200 --output_dir="." --new_warmers_name=polite_warmer --warmers_index=flight_count_20130408 --create_warmer='{"sort" : ["_state", "flight_id","metric", "tb_h", "feature", "seconds", "base_feature", "metric_feature", "cnt", "_score"],"query":{"match_all":{}}}'
|
24
|
+
remove warmer
|
25
|
+
ruby squirrel.rb warmer --host=localhost --port=9200 --output_dir="." --remove_warmer=polite_warmer --warmers_index=flight_count_20130408
|
26
|
+
disable warmers
|
27
|
+
ruby squirrel.rb warmer --host=localhost --port=9200 --output_dir="." --warmers=false --warmers_index=flight_count_20130405
|
28
|
+
enable warmers
|
29
|
+
ruby squirrel.rb warmer --host=localhost --port=9200 --output_dir="." --warmers=false --warmers_index=flight_count_20130405
|
30
|
+
remove warmer
|
31
|
+
ruby squirrel.rb warmer --host=localhost --port=9200 --output_dir="." --remove_warmer=polite_warmer --warmers_index=flight_count_20130405
|
32
|
+
change index settings
|
33
|
+
ruby squirrel.rb index_settings --host=localhost --port=9200 --output_dir="." --settings_index=flight_count_20130405 --es_index_settings=refresh_interval,refresh_interval --es_index_settings_values=-1,0
|
34
|
+
DOC
|
35
|
+
|
36
|
+
Settings.use :commandline
|
37
|
+
Settings.use :commands
|
38
|
+
Settings.description = doc
|
39
|
+
Settings.define_command :backup, :description => "Create a dump gzip file of an index" do |cmd|
|
40
|
+
cmd.define :output_dir, :default => nil, :description => 'Directory to put output, defaults to nil'
|
41
|
+
cmd.define :dump_file, :default => nil, :description => 'The name of the dumpfile to use, default is nil'
|
42
|
+
cmd.define :dump_index, :default => nil, :description => 'Index to use, default is nil'
|
43
|
+
cmd.define :query, :default => nil, :description => 'Query to use in order to limit the data extracted from the index, default nil'
|
44
|
+
cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
|
45
|
+
cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
|
46
|
+
cmd.define :dump_mapping, :default => nil, :description => 'The file to put the json mapping in, defaults to nil'
|
47
|
+
cmd.define :batch_size, :type => Integer, :default => nil, :description => 'The number of lines to process at once, defaults to nil'
|
48
|
+
end
|
49
|
+
Settings.define_command :restore, :description => "Take the data from a dump gzip file and use it to populate an index" do |cmd|
|
50
|
+
cmd.define :output_dir, :default => nil, :description => 'Directory to put output, defaults to nil'
|
51
|
+
cmd.define :restore_file, :default => nil, :description => 'The name of the dumpfile to use, default is nil'
|
52
|
+
cmd.define :restore_index, :default => nil, :description => 'Index to use, default is nil'
|
53
|
+
cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
|
54
|
+
cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
|
55
|
+
cmd.define :restore_mapping, :default => nil, :description => 'The mapping file to use when restoring an index, defaults to nil'
|
56
|
+
cmd.define :batch_size, :type => Integer, :default => nil, :description => 'The number of lines to process at once, defaults to nil'
|
57
|
+
end
|
58
|
+
Settings.define_command :duplicate, :description => "Take the data from a dump gzip file and add it into an index allowing documents to be duplicated" do |cmd|
|
59
|
+
cmd.define :output_dir, :default => nil, :description => 'Directory to put output, defaults to nil'
|
60
|
+
cmd.define :duplicate_file, :default => nil, :description => 'The name of the dumpfile to use, default is nil'
|
61
|
+
cmd.define :duplicate_index, :default => nil, :description => 'Index to use, default is nil'
|
62
|
+
cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
|
63
|
+
cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
|
64
|
+
cmd.define :duplicated_mapping, :default => nil, :description => 'The mapping file to use when restoring an index, defaults to nil'
|
65
|
+
cmd.define :batch_size, :type => Integer, :default => nil, :description => 'The number of lines to process at once, defaults to nil'
|
66
|
+
end
|
67
|
+
Settings.define_command :cardinality, :description => "Count the number of unique valuse for the given field(s)" do |cmd|
|
68
|
+
cmd.define :output_dir, :default => nil, :description => 'Directory to put output, defaults to nil'
|
69
|
+
cmd.define :cardinality, :type => Array, :default => nil, :description => 'Return the cardinality of the given fields, defaults to nil'
|
70
|
+
cmd.define :card_file, :default => nil, :description => 'The dump file to grab info from when determining cardinality MUST NOT be compressed, defaults to nil'
|
71
|
+
end
|
72
|
+
Settings.define_command :warmer, :description => "Interact elasticsearch warmers" do |cmd|
|
73
|
+
cmd.define :warmers, :default => nil, :description => 'Use warmers expected values true/false, defaults to nil'
|
74
|
+
cmd.define :warmers_index, :default => nil, :description => 'The index to add the warmer too, remove it from or disable/enable it on, defaults to nil'
|
75
|
+
cmd.define :new_warmers_name, :default => nil, :description => 'Name of warmer to create, defaults to nil'
|
76
|
+
cmd.define :create_warmer, :default => nil, :description => 'Query to create warmer, defaults to nil'
|
77
|
+
cmd.define :remove_warmer, :default => nil, :description => 'Name of warmer to remove, defaults to nil'
|
78
|
+
cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
|
79
|
+
cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
|
80
|
+
end
|
81
|
+
Settings.define_command :cache, :description => "Interact with elasticsearch caches" do |cmd|
|
82
|
+
cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
|
83
|
+
cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
|
84
|
+
cmd.define :clear_all_cache, :type => :boolean, :default => nil, :description => 'Clear all caches expected true/false, defaults to nil'
|
85
|
+
cmd.define :clear_fielddata, :type => :boolean, :default => nil, :description => 'Clear filter cache expected true/false, defaults to nil'
|
86
|
+
cmd.define :clear_filter_cache, :type => :boolean, :default => nil, :description => 'Clear filter cache expected true/false, defaults to nil'
|
87
|
+
end
|
88
|
+
Settings.define_command :replay, :description => "Replay slow log queries" do |cmd|
|
89
|
+
cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
|
90
|
+
cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
|
91
|
+
cmd.define :execute_slow_queries, :default => nil, :description => 'Execute the slow log queries in the provided log file,ie --execute_slow_log=/var/log/elasticsearch/padraig.log, defaults to nil'
|
92
|
+
cmd.define :batch_size, :type => Integer, :default => nil, :description => 'The number of lines to process at once, defaults to nil'
|
93
|
+
cmd.define :preference, :type => Array, :default => [true,"_primary_first"], :description => 'Turn on/off preference and set it,ie --preference=true,_primary_first'
|
94
|
+
cmd.define :routing, :type => Array, :default => [false, ""], :description => 'Add routing, ie --routing=true,flight_id'
|
95
|
+
end
|
96
|
+
Settings.define_command :index_settings, :description => "Change the index settings" do |cmd|
|
97
|
+
cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
|
98
|
+
cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
|
99
|
+
cmd.define :settings_index, :default => nil, :description => 'The index that the settings listed in index_settings will be changed for, defaults to nil'
|
100
|
+
cmd.define :es_index_settings, :type => Array, :default => nil, :description => 'A comma deliminated list of elasticsearch index settings to be set for --settings_index, defaults to []'
|
101
|
+
cmd.define :es_index_settings_values, :type => Array, :default => nil, :description => 'A comma deliminated list of elasticsearch index settings values to be set for --settings_index, defaults to []'
|
102
|
+
end
|
103
|
+
Settings.resolve!
|
104
|
+
|
105
|
+
|
106
|
+
Squirrel.new(Settings.command_name, Settings.to_hash).task_caller
|
107
|
+
|
108
|
+
|
data/lib/wonderdog.rb
CHANGED
@@ -26,6 +26,9 @@ module Wukong
|
|
26
26
|
settings.define(:es_id_field, :description => "If this field is present in a record, make an update request, otherwise make a create request", :wukong_hadoop => true)
|
27
27
|
settings.define(:es_bulk_size, :description => "Number of requests to batch locally before making a request to ElasticSearch", :type => Integer, :wukong_hadoop => true)
|
28
28
|
settings.define(:es_query, :description => "Query to use when defining input splits for ElasticSearch input", :wukong_hadoop => true)
|
29
|
+
settings.define(:es_transport, :description => "Use a transport client to an existing node instead of spinning up a new node", :default => true, type: :boolean, :wukong_hadoop => true)
|
30
|
+
settings.define(:es_transport_host, :description => "Host of existing node for transport client", default: 'localhost', :wukong_hadoop => true)
|
31
|
+
settings.define(:es_transport_port, :description => "Port of existing node for transport client", default: 9300, type: Integer, :wukong_hadoop => true)
|
29
32
|
end
|
30
33
|
|
31
34
|
# Boot Wonderdog with the given `settings` in the given `dir`.
|
@@ -105,7 +105,10 @@ module Wukong
|
|
105
105
|
|
106
106
|
super() + [].tap do |o|
|
107
107
|
if (reads_from_elasticsearch? || writes_to_elasticsearch?)
|
108
|
-
o << java_opt('es.config',
|
108
|
+
o << java_opt('es.config', settings[:es_config])
|
109
|
+
o << java_opt('elasticsearch.transport', settings[:es_transport])
|
110
|
+
o << java_opt('elasticsearch.transport.host', settings[:es_transport_host]) if settings[:es_transport] && settings[:es_transport_host]
|
111
|
+
o << java_opt('elasticsearch.transport.port', settings[:es_transport_port]) if settings[:es_transport] && settings[:es_transport_port]
|
109
112
|
end
|
110
113
|
|
111
114
|
if reads_from_elasticsearch?
|
data/lib/wonderdog/version.rb
CHANGED
data/pom.xml
CHANGED
data/spec/spec_helper.rb
CHANGED
@@ -8,7 +8,7 @@ describe Wukong::Elasticsearch::HadoopInvocationOverride do
|
|
8
8
|
let(:es_complex) { hadoop_runner('regexp', 'count', input: 'es://the_index/the_map', output: 'es:///the_index/the_map', es_query: '{"hi": "there"}', es_request_size: 1000, es_index_field: 'ID', map_speculative: true, reduce_speculative: true) }
|
9
9
|
|
10
10
|
context "passing necessary jars to Hadoop streaming" do
|
11
|
-
before { Dir.stub
|
11
|
+
before { Dir.stub(:[]).and_return(["/lib/dir/elasticsearch.jar"], ["/lib/dir/wonderdog.jar"]) }
|
12
12
|
context "when not given explicit jars" do
|
13
13
|
context "and not interacting with Elasticsearch" do
|
14
14
|
it "doesn't add jars" do
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'configliere'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
Settings.use :commandline
|
5
|
+
Settings.use :config_block
|
6
|
+
Settings.define :es_index, default: nil
|
7
|
+
Settings.define :fields, default: nil
|
8
|
+
Settings.define :one_index, default: false, flag: 'o'
|
9
|
+
Settings.define :composite_key, default: nil
|
10
|
+
Settings.define :es_type
|
11
|
+
Settings.finally do |settings|
|
12
|
+
settings.es_index ||= settings.one_index ? [settings.es_type, "cnt"].join : 'ad_activity'
|
13
|
+
end
|
14
|
+
Settings.resolve!
|
15
|
+
|
16
|
+
puts "using es index #{Settings.es_index} and type #{Settings.es_type}"
|
17
|
+
|
18
|
+
index = Settings.es_index
|
19
|
+
type = Settings.es_type
|
20
|
+
|
21
|
+
response = `curl localhost:9200/#{Settings.es_index}/_mapping/`
|
22
|
+
puts JSON.parse(response)["#{index}"]["#{type}"]#["properties"]
|
23
|
+
fields = JSON.parse(response)["#{index}"]["#{type}"]["properties"].keys#.select{|x| x.end_with?("_id")}
|
24
|
+
#fields += %w[metric feature]# browser_ua]
|
25
|
+
#fields = JSON.parse(response)["#{type}_legacy"][type]["properties"].keys.select{|x| x.end_with?("_id")}
|
26
|
+
|
27
|
+
puts "got fields #{Settings.fields || fields}"
|
28
|
+
|
29
|
+
# site_count => site_composite
|
30
|
+
# placement_count => pl_composite
|
31
|
+
# flight_count => metric_feature
|
32
|
+
|
33
|
+
composite_key = case Settings.es_type
|
34
|
+
when "site_count" then "site_composite"
|
35
|
+
when "placement_count" then "pl_composite"
|
36
|
+
when "flight_count" then "metric_feature"
|
37
|
+
else nil
|
38
|
+
end
|
39
|
+
|
40
|
+
doc = {
|
41
|
+
"query"=> {
|
42
|
+
"match_all"=> {}
|
43
|
+
},
|
44
|
+
"facets"=> {
|
45
|
+
# "ignz"=> {
|
46
|
+
# "statistical"=> {
|
47
|
+
# "field" => "cnt"
|
48
|
+
# }
|
49
|
+
# },
|
50
|
+
"igna"=> {
|
51
|
+
"date_histogram"=> {
|
52
|
+
"field"=> "tb_h",
|
53
|
+
"interval" => "day"
|
54
|
+
}
|
55
|
+
},
|
56
|
+
"ignb"=> {
|
57
|
+
"terms"=> {
|
58
|
+
"fields"=> [Settings.fields || fields].flatten
|
59
|
+
}
|
60
|
+
},
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
64
|
+
fields.each do |field|
|
65
|
+
doc["facets"]["ign_#{field}"] = {
|
66
|
+
"terms_stats" => {
|
67
|
+
"key_field" => field,
|
68
|
+
"value_field" => "cnt"
|
69
|
+
}
|
70
|
+
}
|
71
|
+
end
|
72
|
+
|
73
|
+
if not composite_key.nil?
|
74
|
+
doc["facets"]["ignc"] = {
|
75
|
+
"terms_stats" => {
|
76
|
+
"key_field" => Settings.composite_key || composite_key,
|
77
|
+
"value_field" => "cnt"
|
78
|
+
}
|
79
|
+
}
|
80
|
+
end
|
81
|
+
|
82
|
+
puts "about to query with " + doc.to_s
|
83
|
+
|
84
|
+
results = `curl localhost:9200/#{Settings.es_index}/#{Settings.es_type}/_search/?pretty=true -d '#{JSON.generate(doc)}'`
|
85
|
+
term_hash = {}
|
86
|
+
resultsLineArray = results.split( /\r?\n/ )
|
87
|
+
copy_resultsLineArray = resultsLineArray - []
|
88
|
+
copy_resultsLineArray.each_with_index do |line, index|
|
89
|
+
if line.include?("\"term\" : \"")
|
90
|
+
term_hash[line] = index
|
91
|
+
end
|
92
|
+
end
|
93
|
+
puts `curl localhost:9200/#{Settings.es_index}/#{Settings.es_type}/_search/?pretty=true -d '#{JSON.generate(doc)}'`
|
94
|
+
puts "from command: \n curl localhost:9200/#{Settings.es_index}/#{Settings.es_type}/_search/ -d '#{JSON.generate(doc)}'"
|
95
|
+
puts "number of terms: #{term_hash.keys.size()}"
|
@@ -0,0 +1,19 @@
|
|
1
|
+
class ChangeESIndexSettings
|
2
|
+
def initialize(options = {})
|
3
|
+
@host = options[:host]
|
4
|
+
@port = options[:port]
|
5
|
+
@index = options[:index]
|
6
|
+
@settings_and_values = options[:settings_and_values]
|
7
|
+
end
|
8
|
+
|
9
|
+
def change_setting(setting, value)
|
10
|
+
puts "changing setting #{setting} to value #{value}"
|
11
|
+
`curl -s -XPUT 'http://#{@host}:#{@port}/#{@index}/_settings?pretty=true' -d '{ "#{setting}":"#{value}" }'`
|
12
|
+
end
|
13
|
+
|
14
|
+
def run
|
15
|
+
@settings_and_values.each do |setting, value|
|
16
|
+
change_setting(setting, value)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
class ClearESCaches
|
2
|
+
def initialize(options={})
|
3
|
+
@to_clear = options[:type]
|
4
|
+
@host = options[:host]
|
5
|
+
@port = options[:port]
|
6
|
+
end
|
7
|
+
|
8
|
+
def clear_all
|
9
|
+
`curl -s -XPOST 'http://#{@host}:#{@port}/_all/_cache/clear?field_data=true&filter=true&bloom=true' ; echo`
|
10
|
+
end
|
11
|
+
|
12
|
+
def clear_filter_cache
|
13
|
+
`curl -s -XPOST 'http://#{@host}:#{@port}/_all/_cache/clear?field_data=false&filter=true&bloom=true' ; echo`
|
14
|
+
end
|
15
|
+
|
16
|
+
def clear_fielddata
|
17
|
+
`curl -s -XPOST 'http://#{@host}:#{@port}/_all/_cache/clear?field_data=true&filter=false&bloom=true' ; echo`
|
18
|
+
end
|
19
|
+
|
20
|
+
def run
|
21
|
+
puts @to_clear
|
22
|
+
case command = @to_clear.to_sym
|
23
|
+
when :all then clear_all
|
24
|
+
when :filter then clear_filter_cache
|
25
|
+
when :fielddata then clear_fielddata
|
26
|
+
else abort "#{command} not recognized"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|