wonderdog 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -0
- data/.idea/encodings.xml +5 -0
- data/.idea/misc.xml +5 -0
- data/.idea/modules.xml +9 -0
- data/.idea/scopes/scope_settings.xml +5 -0
- data/.idea/vcs.xml +7 -0
- data/.idea/wonderdog.iml +41 -0
- data/Gemfile +1 -1
- data/bin/estool +22 -1
- data/bin/squirrel.rb +108 -0
- data/lib/wonderdog.rb +3 -0
- data/lib/wonderdog/hadoop_invocation_override.rb +4 -1
- data/lib/wonderdog/version.rb +1 -1
- data/pom.xml +1 -1
- data/spec/spec_helper.rb +1 -1
- data/spec/wonderdog/hadoop_invocation_override_spec.rb +1 -1
- data/squirrel/all_facets.rb +95 -0
- data/squirrel/change_es_index_settings.rb +19 -0
- data/squirrel/clear_es_caches.rb +30 -0
- data/squirrel/esbackup.rb +184 -0
- data/squirrel/esbackup_stripped.rb +153 -0
- data/squirrel/fields.sh +5 -0
- data/squirrel/getFields.rb +19 -0
- data/squirrel/replay.rb +219 -0
- data/squirrel/squirrel.rb +95 -0
- data/squirrel/warmer_interface.rb +59 -0
- data/src/main/java/com/infochimps/elasticsearch/ElasticSearchInputFormat.java +2 -2
- data/src/main/java/com/infochimps/elasticsearch/ElasticSearchStreamingInputFormat.java +14 -2
- data/src/main/java/com/infochimps/elasticsearch/ElasticSearchStreamingOutputFormat.java +20 -5
- data/src/main/java/com/infochimps/elasticsearch/ElasticSearchStreamingRecordReader.java +55 -26
- data/src/main/java/com/infochimps/elasticsearch/ElasticSearchStreamingRecordWriter.java +59 -22
- data/test/cardinality.rb +43 -0
- data/test/change_es_index_settings.rb +19 -0
- data/test/clear_es_caches.rb +30 -0
- data/test/config/mapping.yml +327 -0
- data/test/config/mappings.yml +328 -0
- data/test/count_check.txt +0 -0
- data/test/esbackup_stripped.rb +153 -0
- data/test/mapping.yml +327 -0
- data/test/medium_slow_queries +41 -0
- data/test/queries.txt +0 -0
- data/test/quick_test_slow_queries +4 -0
- data/test/run_pry.rb +3 -0
- data/test/some_slow_queries +53 -0
- data/test/warmer_interface.rb +64 -0
- data/test/warmindices.rb +65 -0
- data/wonderdog.gemspec +1 -1
- metadata +40 -7
data/.gitignore
CHANGED
data/.idea/encodings.xml
ADDED
data/.idea/misc.xml
ADDED
data/.idea/modules.xml
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<project version="4">
|
3
|
+
<component name="ProjectModuleManager">
|
4
|
+
<modules>
|
5
|
+
<module fileurl="file://$PROJECT_DIR$/.idea/wonderdog.iml" filepath="$PROJECT_DIR$/.idea/wonderdog.iml" />
|
6
|
+
</modules>
|
7
|
+
</component>
|
8
|
+
</project>
|
9
|
+
|
data/.idea/vcs.xml
ADDED
data/.idea/wonderdog.iml
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<module type="RUBY_MODULE" version="4">
|
3
|
+
<component name="FacetManager">
|
4
|
+
<facet type="gem" name="Gem">
|
5
|
+
<configuration>
|
6
|
+
<option name="GEM_APP_ROOT_PATH" value="$MODULE_DIR$" />
|
7
|
+
<option name="GEM_APP_TEST_PATH" value="$MODULE_DIR$/test" />
|
8
|
+
<option name="GEM_APP_LIB_PATH" value="$MODULE_DIR$/lib" />
|
9
|
+
</configuration>
|
10
|
+
</facet>
|
11
|
+
</component>
|
12
|
+
<component name="NewModuleRootManager">
|
13
|
+
<content url="file://$MODULE_DIR$">
|
14
|
+
<sourceFolder url="file://$MODULE_DIR$/test" isTestSource="true" />
|
15
|
+
</content>
|
16
|
+
<orderEntry type="inheritedJdk" />
|
17
|
+
<orderEntry type="sourceFolder" forTests="false" />
|
18
|
+
<orderEntry type="library" scope="PROVIDED" name="bundler (v1.3.5, rbenv: 1.9.3-p392) [gem]" level="application" />
|
19
|
+
<orderEntry type="library" scope="PROVIDED" name="configliere (v0.4.18, rbenv: 1.9.3-p392) [gem]" level="application" />
|
20
|
+
<orderEntry type="library" scope="PROVIDED" name="diff-lcs (v1.2.4, rbenv: 1.9.3-p392) [gem]" level="application" />
|
21
|
+
<orderEntry type="library" scope="PROVIDED" name="eventmachine (v1.0.3, rbenv: 1.9.3-p392) [gem]" level="application" />
|
22
|
+
<orderEntry type="library" scope="PROVIDED" name="forgery (v0.5.0, rbenv: 1.9.3-p392) [gem]" level="application" />
|
23
|
+
<orderEntry type="library" scope="PROVIDED" name="gorillib (v0.5.0, rbenv: 1.9.3-p392) [gem]" level="application" />
|
24
|
+
<orderEntry type="library" scope="PROVIDED" name="highline (v1.6.18, rbenv: 1.9.3-p392) [gem]" level="application" />
|
25
|
+
<orderEntry type="library" scope="PROVIDED" name="json (v1.7.7, rbenv: 1.9.3-p392) [gem]" level="application" />
|
26
|
+
<orderEntry type="library" scope="PROVIDED" name="log4r (v1.1.10, rbenv: 1.9.3-p392) [gem]" level="application" />
|
27
|
+
<orderEntry type="library" scope="PROVIDED" name="multi_json (v1.7.2, rbenv: 1.9.3-p392) [gem]" level="application" />
|
28
|
+
<orderEntry type="library" scope="PROVIDED" name="rake (v0.9.6, rbenv: 1.9.3-p392) [gem]" level="application" />
|
29
|
+
<orderEntry type="library" scope="PROVIDED" name="redcarpet (v2.2.2, rbenv: 1.9.3-p392) [gem]" level="application" />
|
30
|
+
<orderEntry type="library" scope="PROVIDED" name="rspec (v2.13.0, rbenv: 1.9.3-p392) [gem]" level="application" />
|
31
|
+
<orderEntry type="library" scope="PROVIDED" name="rspec-core (v2.13.1, rbenv: 1.9.3-p392) [gem]" level="application" />
|
32
|
+
<orderEntry type="library" scope="PROVIDED" name="rspec-expectations (v2.13.0, rbenv: 1.9.3-p392) [gem]" level="application" />
|
33
|
+
<orderEntry type="library" scope="PROVIDED" name="rspec-mocks (v2.13.1, rbenv: 1.9.3-p392) [gem]" level="application" />
|
34
|
+
<orderEntry type="library" scope="PROVIDED" name="uuidtools (v2.1.3, rbenv: 1.9.3-p392) [gem]" level="application" />
|
35
|
+
<orderEntry type="library" scope="PROVIDED" name="vayacondios-client (v0.1.12, rbenv: 1.9.3-p392) [gem]" level="application" />
|
36
|
+
<orderEntry type="library" scope="PROVIDED" name="wukong (v3.0.1, rbenv: 1.9.3-p392) [gem]" level="application" />
|
37
|
+
<orderEntry type="library" scope="PROVIDED" name="wukong-hadoop (v0.1.1, rbenv: 1.9.3-p392) [gem]" level="application" />
|
38
|
+
<orderEntry type="library" scope="PROVIDED" name="yard (v0.8.6.1, rbenv: 1.9.3-p392) [gem]" level="application" />
|
39
|
+
</component>
|
40
|
+
</module>
|
41
|
+
|
data/Gemfile
CHANGED
data/bin/estool
CHANGED
@@ -23,6 +23,9 @@ Commands include:
|
|
23
23
|
optimize Optimizes the specified INDEX to (-s) number of segments
|
24
24
|
snapshot Snapshots the specified INDEX to the gateway
|
25
25
|
segments Returns the segment information. Requires ElasticSearch v
|
26
|
+
aliases Returns a list of Index/Alias pairs
|
27
|
+
ialiases Returns a list of Alias/Index pairs
|
28
|
+
count The number of documents in an index
|
26
29
|
mapping
|
27
30
|
set_replication
|
28
31
|
search
|
@@ -89,7 +92,7 @@ class ESTool
|
|
89
92
|
|
90
93
|
def status() shell_response(File.join(options.index, "_status?")) ; end
|
91
94
|
|
92
|
-
def list() status["indices"].keys ; end
|
95
|
+
def list() status["indices"].keys.sort ; end
|
93
96
|
|
94
97
|
def health() shell_response("_cluster/health?") ; end
|
95
98
|
|
@@ -111,6 +114,24 @@ class ESTool
|
|
111
114
|
def segments() shell_response(File.join(options.index, "_segments")) ; end
|
112
115
|
|
113
116
|
def mapping() shell_response(File.join(options.index, "_mapping")) ; end
|
117
|
+
|
118
|
+
def aliases() shell_response('_aliases?').sort.inject({}){ |hsh, (index, info)| hsh[index] = info['aliases'].keys ; hsh } ; end
|
119
|
+
|
120
|
+
def ialiases()
|
121
|
+
inverse = Hash.new{ |hsh, key| hsh[key] = [] }
|
122
|
+
aliases.each{ |idx, als| als.each{ |a| inverse[a] << idx } }
|
123
|
+
inverse
|
124
|
+
end
|
125
|
+
|
126
|
+
# estool status -r | ruby -rjson -e 'puts JSON.parse($stdin.read)["indices"]["item_count_legacy"]["docs"]'
|
127
|
+
# {"num_docs"=>187749, "max_doc"=>187749, "deleted_docs"=>0}
|
128
|
+
def count()
|
129
|
+
if options.index == '_all'
|
130
|
+
status['indices'].inject({}){ |hsh, (index, info)| hsh[index] = info['docs']['num_docs'] ; hsh }
|
131
|
+
else
|
132
|
+
{ options.index => shell_response(File.join(options.index, '_count'))['count'] }
|
133
|
+
end
|
134
|
+
end
|
114
135
|
|
115
136
|
# curl -s -XPUT http://host:port/index/_settings -d '{"index":{"number_of_replicas":num}}'
|
116
137
|
def set_replication() { "error" => "method not yet implemented" }; end
|
data/bin/squirrel.rb
ADDED
@@ -0,0 +1,108 @@
|
|
1
|
+
#! /usr/bin/ruby
|
2
|
+
|
3
|
+
require "configliere"
|
4
|
+
require_relative "../squirrel/squirrel.rb"
|
5
|
+
|
6
|
+
doc = <<DOC
|
7
|
+
This is the uber script the arguements you give it decide what happens
|
8
|
+
squirrel => Standard Query Ultracrepidate Iamatology Ruby Resource for Elasticsearch Labarum ##
|
9
|
+
example commands:
|
10
|
+
clear all caches
|
11
|
+
ruby squirrel.rb cache --host=localhost --port=9200 --clear_all_cache=true
|
12
|
+
run slow log queries
|
13
|
+
ruby squirrel.rb replay --host=localhost --port=9200 --preference=true,_primary_first --execute_slow_queries=/var/log/elasticsearch/padraig.log
|
14
|
+
get backup an index aka generate a dumpfile
|
15
|
+
ruby squirrel.rb backup --host=localhost --port=9200 --output_dir="." --dump_index=flight_count_20130405 --batch_size=100 --dump_mapping=flight_count_20130405_mapping.json
|
16
|
+
get the cardinality of a dumpfile(card_file)
|
17
|
+
ruby squirrel.rb cardinality --host=localhost --port=9200 --output_dir="." --card_file=flight_count_20130405 --cardinality=cnt,metric
|
18
|
+
restore an index from a dumpfile
|
19
|
+
ruby squirrel.rb restore --host=localhost --port=9200 --output_dir="." --restore_file=flight_count_20130405.gz --restore_index=flight_count_20130405 --restore_mapping=flight_count_20130405_mapping.json --batch_size=100
|
20
|
+
duplicate files in an index from a dumpfile(duplicate_file)
|
21
|
+
ruby squirrel.rb duplicate --host=localhost --port=9200 --output_dir="." --duplicate_file=flight_count_20130405.gz --duplicate_index=eight_flight_count_20130405 --duplicate_mapping=flight_count_20130405_mapping.json --batch_size=100
|
22
|
+
add warmer
|
23
|
+
ruby squirrel.rb warmer --host=localhost --port=9200 --output_dir="." --new_warmers_name=polite_warmer --warmers_index=flight_count_20130408 --create_warmer='{"sort" : ["_state", "flight_id","metric", "tb_h", "feature", "seconds", "base_feature", "metric_feature", "cnt", "_score"],"query":{"match_all":{}}}'
|
24
|
+
remove warmer
|
25
|
+
ruby squirrel.rb warmer --host=localhost --port=9200 --output_dir="." --remove_warmer=polite_warmer --warmers_index=flight_count_20130408
|
26
|
+
disable warmers
|
27
|
+
ruby squirrel.rb warmer --host=localhost --port=9200 --output_dir="." --warmers=false --warmers_index=flight_count_20130405
|
28
|
+
enable warmers
|
29
|
+
ruby squirrel.rb warmer --host=localhost --port=9200 --output_dir="." --warmers=false --warmers_index=flight_count_20130405
|
30
|
+
remove warmer
|
31
|
+
ruby squirrel.rb warmer --host=localhost --port=9200 --output_dir="." --remove_warmer=polite_warmer --warmers_index=flight_count_20130405
|
32
|
+
change index settings
|
33
|
+
ruby squirrel.rb index_settings --host=localhost --port=9200 --output_dir="." --settings_index=flight_count_20130405 --es_index_settings=refresh_interval,refresh_interval --es_index_settings_values=-1,0
|
34
|
+
DOC
|
35
|
+
|
36
|
+
Settings.use :commandline
|
37
|
+
Settings.use :commands
|
38
|
+
Settings.description = doc
|
39
|
+
Settings.define_command :backup, :description => "Create a dump gzip file of an index" do |cmd|
|
40
|
+
cmd.define :output_dir, :default => nil, :description => 'Directory to put output, defaults to nil'
|
41
|
+
cmd.define :dump_file, :default => nil, :description => 'The name of the dumpfile to use, default is nil'
|
42
|
+
cmd.define :dump_index, :default => nil, :description => 'Index to use, default is nil'
|
43
|
+
cmd.define :query, :default => nil, :description => 'Query to use in order to limit the data extracted from the index, default nil'
|
44
|
+
cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
|
45
|
+
cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
|
46
|
+
cmd.define :dump_mapping, :default => nil, :description => 'The file to put the json mapping in, defaults to nil'
|
47
|
+
cmd.define :batch_size, :type => Integer, :default => nil, :description => 'The number of lines to process at once, defaults to nil'
|
48
|
+
end
|
49
|
+
Settings.define_command :restore, :description => "Take the data from a dump gzip file and use it to populate an index" do |cmd|
|
50
|
+
cmd.define :output_dir, :default => nil, :description => 'Directory to put output, defaults to nil'
|
51
|
+
cmd.define :restore_file, :default => nil, :description => 'The name of the dumpfile to use, default is nil'
|
52
|
+
cmd.define :restore_index, :default => nil, :description => 'Index to use, default is nil'
|
53
|
+
cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
|
54
|
+
cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
|
55
|
+
cmd.define :restore_mapping, :default => nil, :description => 'The mapping file to use when restoring an index, defaults to nil'
|
56
|
+
cmd.define :batch_size, :type => Integer, :default => nil, :description => 'The number of lines to process at once, defaults to nil'
|
57
|
+
end
|
58
|
+
Settings.define_command :duplicate, :description => "Take the data from a dump gzip file and add it into an index allowing documents to be duplicated" do |cmd|
|
59
|
+
cmd.define :output_dir, :default => nil, :description => 'Directory to put output, defaults to nil'
|
60
|
+
cmd.define :duplicate_file, :default => nil, :description => 'The name of the dumpfile to use, default is nil'
|
61
|
+
cmd.define :duplicate_index, :default => nil, :description => 'Index to use, default is nil'
|
62
|
+
cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
|
63
|
+
cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
|
64
|
+
cmd.define :duplicated_mapping, :default => nil, :description => 'The mapping file to use when restoring an index, defaults to nil'
|
65
|
+
cmd.define :batch_size, :type => Integer, :default => nil, :description => 'The number of lines to process at once, defaults to nil'
|
66
|
+
end
|
67
|
+
Settings.define_command :cardinality, :description => "Count the number of unique valuse for the given field(s)" do |cmd|
|
68
|
+
cmd.define :output_dir, :default => nil, :description => 'Directory to put output, defaults to nil'
|
69
|
+
cmd.define :cardinality, :type => Array, :default => nil, :description => 'Return the cardinality of the given fields, defaults to nil'
|
70
|
+
cmd.define :card_file, :default => nil, :description => 'The dump file to grab info from when determining cardinality MUST NOT be compressed, defaults to nil'
|
71
|
+
end
|
72
|
+
Settings.define_command :warmer, :description => "Interact elasticsearch warmers" do |cmd|
|
73
|
+
cmd.define :warmers, :default => nil, :description => 'Use warmers expected values true/false, defaults to nil'
|
74
|
+
cmd.define :warmers_index, :default => nil, :description => 'The index to add the warmer too, remove it from or disable/enable it on, defaults to nil'
|
75
|
+
cmd.define :new_warmers_name, :default => nil, :description => 'Name of warmer to create, defaults to nil'
|
76
|
+
cmd.define :create_warmer, :default => nil, :description => 'Query to create warmer, defaults to nil'
|
77
|
+
cmd.define :remove_warmer, :default => nil, :description => 'Name of warmer to remove, defaults to nil'
|
78
|
+
cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
|
79
|
+
cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
|
80
|
+
end
|
81
|
+
Settings.define_command :cache, :description => "Interact with elasticsearch caches" do |cmd|
|
82
|
+
cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
|
83
|
+
cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
|
84
|
+
cmd.define :clear_all_cache, :type => :boolean, :default => nil, :description => 'Clear all caches expected true/false, defaults to nil'
|
85
|
+
cmd.define :clear_fielddata, :type => :boolean, :default => nil, :description => 'Clear filter cache expected true/false, defaults to nil'
|
86
|
+
cmd.define :clear_filter_cache, :type => :boolean, :default => nil, :description => 'Clear filter cache expected true/false, defaults to nil'
|
87
|
+
end
|
88
|
+
Settings.define_command :replay, :description => "Replay slow log queries" do |cmd|
|
89
|
+
cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
|
90
|
+
cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
|
91
|
+
cmd.define :execute_slow_queries, :default => nil, :description => 'Execute the slow log queries in the provided log file,ie --execute_slow_log=/var/log/elasticsearch/padraig.log, defaults to nil'
|
92
|
+
cmd.define :batch_size, :type => Integer, :default => nil, :description => 'The number of lines to process at once, defaults to nil'
|
93
|
+
cmd.define :preference, :type => Array, :default => [true,"_primary_first"], :description => 'Turn on/off preference and set it,ie --preference=true,_primary_first'
|
94
|
+
cmd.define :routing, :type => Array, :default => [false, ""], :description => 'Add routing, ie --routing=true,flight_id'
|
95
|
+
end
|
96
|
+
Settings.define_command :index_settings, :description => "Change the index settings" do |cmd|
|
97
|
+
cmd.define :host, :default => nil, :description => 'The host to connect to, defaults to nil'
|
98
|
+
cmd.define :port, :type => Integer, :default => nil, :description => 'The port to connect to on the host, defaults to nil'
|
99
|
+
cmd.define :settings_index, :default => nil, :description => 'The index that the settings listed in index_settings will be changed for, defaults to nil'
|
100
|
+
cmd.define :es_index_settings, :type => Array, :default => nil, :description => 'A comma deliminated list of elasticsearch index settings to be set for --settings_index, defaults to []'
|
101
|
+
cmd.define :es_index_settings_values, :type => Array, :default => nil, :description => 'A comma deliminated list of elasticsearch index settings values to be set for --settings_index, defaults to []'
|
102
|
+
end
|
103
|
+
Settings.resolve!
|
104
|
+
|
105
|
+
|
106
|
+
Squirrel.new(Settings.command_name, Settings.to_hash).task_caller
|
107
|
+
|
108
|
+
|
data/lib/wonderdog.rb
CHANGED
@@ -26,6 +26,9 @@ module Wukong
|
|
26
26
|
settings.define(:es_id_field, :description => "If this field is present in a record, make an update request, otherwise make a create request", :wukong_hadoop => true)
|
27
27
|
settings.define(:es_bulk_size, :description => "Number of requests to batch locally before making a request to ElasticSearch", :type => Integer, :wukong_hadoop => true)
|
28
28
|
settings.define(:es_query, :description => "Query to use when defining input splits for ElasticSearch input", :wukong_hadoop => true)
|
29
|
+
settings.define(:es_transport, :description => "Use a transport client to an existing node instead of spinning up a new node", :default => true, type: :boolean, :wukong_hadoop => true)
|
30
|
+
settings.define(:es_transport_host, :description => "Host of existing node for transport client", default: 'localhost', :wukong_hadoop => true)
|
31
|
+
settings.define(:es_transport_port, :description => "Port of existing node for transport client", default: 9300, type: Integer, :wukong_hadoop => true)
|
29
32
|
end
|
30
33
|
|
31
34
|
# Boot Wonderdog with the given `settings` in the given `dir`.
|
@@ -105,7 +105,10 @@ module Wukong
|
|
105
105
|
|
106
106
|
super() + [].tap do |o|
|
107
107
|
if (reads_from_elasticsearch? || writes_to_elasticsearch?)
|
108
|
-
o << java_opt('es.config',
|
108
|
+
o << java_opt('es.config', settings[:es_config])
|
109
|
+
o << java_opt('elasticsearch.transport', settings[:es_transport])
|
110
|
+
o << java_opt('elasticsearch.transport.host', settings[:es_transport_host]) if settings[:es_transport] && settings[:es_transport_host]
|
111
|
+
o << java_opt('elasticsearch.transport.port', settings[:es_transport_port]) if settings[:es_transport] && settings[:es_transport_port]
|
109
112
|
end
|
110
113
|
|
111
114
|
if reads_from_elasticsearch?
|
data/lib/wonderdog/version.rb
CHANGED
data/pom.xml
CHANGED
data/spec/spec_helper.rb
CHANGED
@@ -8,7 +8,7 @@ describe Wukong::Elasticsearch::HadoopInvocationOverride do
|
|
8
8
|
let(:es_complex) { hadoop_runner('regexp', 'count', input: 'es://the_index/the_map', output: 'es:///the_index/the_map', es_query: '{"hi": "there"}', es_request_size: 1000, es_index_field: 'ID', map_speculative: true, reduce_speculative: true) }
|
9
9
|
|
10
10
|
context "passing necessary jars to Hadoop streaming" do
|
11
|
-
before { Dir.stub
|
11
|
+
before { Dir.stub(:[]).and_return(["/lib/dir/elasticsearch.jar"], ["/lib/dir/wonderdog.jar"]) }
|
12
12
|
context "when not given explicit jars" do
|
13
13
|
context "and not interacting with Elasticsearch" do
|
14
14
|
it "doesn't add jars" do
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'configliere'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
Settings.use :commandline
|
5
|
+
Settings.use :config_block
|
6
|
+
Settings.define :es_index, default: nil
|
7
|
+
Settings.define :fields, default: nil
|
8
|
+
Settings.define :one_index, default: false, flag: 'o'
|
9
|
+
Settings.define :composite_key, default: nil
|
10
|
+
Settings.define :es_type
|
11
|
+
Settings.finally do |settings|
|
12
|
+
settings.es_index ||= settings.one_index ? [settings.es_type, "cnt"].join : 'ad_activity'
|
13
|
+
end
|
14
|
+
Settings.resolve!
|
15
|
+
|
16
|
+
puts "using es index #{Settings.es_index} and type #{Settings.es_type}"
|
17
|
+
|
18
|
+
index = Settings.es_index
|
19
|
+
type = Settings.es_type
|
20
|
+
|
21
|
+
response = `curl localhost:9200/#{Settings.es_index}/_mapping/`
|
22
|
+
puts JSON.parse(response)["#{index}"]["#{type}"]#["properties"]
|
23
|
+
fields = JSON.parse(response)["#{index}"]["#{type}"]["properties"].keys#.select{|x| x.end_with?("_id")}
|
24
|
+
#fields += %w[metric feature]# browser_ua]
|
25
|
+
#fields = JSON.parse(response)["#{type}_legacy"][type]["properties"].keys.select{|x| x.end_with?("_id")}
|
26
|
+
|
27
|
+
puts "got fields #{Settings.fields || fields}"
|
28
|
+
|
29
|
+
# site_count => site_composite
|
30
|
+
# placement_count => pl_composite
|
31
|
+
# flight_count => metric_feature
|
32
|
+
|
33
|
+
composite_key = case Settings.es_type
|
34
|
+
when "site_count" then "site_composite"
|
35
|
+
when "placement_count" then "pl_composite"
|
36
|
+
when "flight_count" then "metric_feature"
|
37
|
+
else nil
|
38
|
+
end
|
39
|
+
|
40
|
+
doc = {
|
41
|
+
"query"=> {
|
42
|
+
"match_all"=> {}
|
43
|
+
},
|
44
|
+
"facets"=> {
|
45
|
+
# "ignz"=> {
|
46
|
+
# "statistical"=> {
|
47
|
+
# "field" => "cnt"
|
48
|
+
# }
|
49
|
+
# },
|
50
|
+
"igna"=> {
|
51
|
+
"date_histogram"=> {
|
52
|
+
"field"=> "tb_h",
|
53
|
+
"interval" => "day"
|
54
|
+
}
|
55
|
+
},
|
56
|
+
"ignb"=> {
|
57
|
+
"terms"=> {
|
58
|
+
"fields"=> [Settings.fields || fields].flatten
|
59
|
+
}
|
60
|
+
},
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
64
|
+
fields.each do |field|
|
65
|
+
doc["facets"]["ign_#{field}"] = {
|
66
|
+
"terms_stats" => {
|
67
|
+
"key_field" => field,
|
68
|
+
"value_field" => "cnt"
|
69
|
+
}
|
70
|
+
}
|
71
|
+
end
|
72
|
+
|
73
|
+
if not composite_key.nil?
|
74
|
+
doc["facets"]["ignc"] = {
|
75
|
+
"terms_stats" => {
|
76
|
+
"key_field" => Settings.composite_key || composite_key,
|
77
|
+
"value_field" => "cnt"
|
78
|
+
}
|
79
|
+
}
|
80
|
+
end
|
81
|
+
|
82
|
+
puts "about to query with " + doc.to_s
|
83
|
+
|
84
|
+
results = `curl localhost:9200/#{Settings.es_index}/#{Settings.es_type}/_search/?pretty=true -d '#{JSON.generate(doc)}'`
|
85
|
+
term_hash = {}
|
86
|
+
resultsLineArray = results.split( /\r?\n/ )
|
87
|
+
copy_resultsLineArray = resultsLineArray - []
|
88
|
+
copy_resultsLineArray.each_with_index do |line, index|
|
89
|
+
if line.include?("\"term\" : \"")
|
90
|
+
term_hash[line] = index
|
91
|
+
end
|
92
|
+
end
|
93
|
+
puts `curl localhost:9200/#{Settings.es_index}/#{Settings.es_type}/_search/?pretty=true -d '#{JSON.generate(doc)}'`
|
94
|
+
puts "from command: \n curl localhost:9200/#{Settings.es_index}/#{Settings.es_type}/_search/ -d '#{JSON.generate(doc)}'"
|
95
|
+
puts "number of terms: #{term_hash.keys.size()}"
|
@@ -0,0 +1,19 @@
|
|
1
|
+
class ChangeESIndexSettings
|
2
|
+
def initialize(options = {})
|
3
|
+
@host = options[:host]
|
4
|
+
@port = options[:port]
|
5
|
+
@index = options[:index]
|
6
|
+
@settings_and_values = options[:settings_and_values]
|
7
|
+
end
|
8
|
+
|
9
|
+
def change_setting(setting, value)
|
10
|
+
puts "changing setting #{setting} to value #{value}"
|
11
|
+
`curl -s -XPUT 'http://#{@host}:#{@port}/#{@index}/_settings?pretty=true' -d '{ "#{setting}":"#{value}" }'`
|
12
|
+
end
|
13
|
+
|
14
|
+
def run
|
15
|
+
@settings_and_values.each do |setting, value|
|
16
|
+
change_setting(setting, value)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
class ClearESCaches
|
2
|
+
def initialize(options={})
|
3
|
+
@to_clear = options[:type]
|
4
|
+
@host = options[:host]
|
5
|
+
@port = options[:port]
|
6
|
+
end
|
7
|
+
|
8
|
+
def clear_all
|
9
|
+
`curl -s -XPOST 'http://#{@host}:#{@port}/_all/_cache/clear?field_data=true&filter=true&bloom=true' ; echo`
|
10
|
+
end
|
11
|
+
|
12
|
+
def clear_filter_cache
|
13
|
+
`curl -s -XPOST 'http://#{@host}:#{@port}/_all/_cache/clear?field_data=false&filter=true&bloom=true' ; echo`
|
14
|
+
end
|
15
|
+
|
16
|
+
def clear_fielddata
|
17
|
+
`curl -s -XPOST 'http://#{@host}:#{@port}/_all/_cache/clear?field_data=true&filter=false&bloom=true' ; echo`
|
18
|
+
end
|
19
|
+
|
20
|
+
def run
|
21
|
+
puts @to_clear
|
22
|
+
case command = @to_clear.to_sym
|
23
|
+
when :all then clear_all
|
24
|
+
when :filter then clear_filter_cache
|
25
|
+
when :fielddata then clear_fielddata
|
26
|
+
else abort "#{command} not recognized"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|