wonderdog 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# Wonderdog
|
2
2
|
|
3
|
-
Wonderdog is a Hadoop interface to Elastic Search. While it is specifically intended for use with Apache Pig, it does include all the necessary Hadoop input and output formats for Elastic Search. That is, it's possible to skip Pig
|
4
|
-
tirely and write custom Hadoop jobs if you prefer.
|
3
|
+
Wonderdog is a Hadoop interface to Elastic Search. While it is specifically intended for use with Apache Pig, it does include all the necessary Hadoop input and output formats for Elastic Search. That is, it's possible to skip Pig entirely and write custom Hadoop jobs if you prefer.
|
5
4
|
|
6
5
|
## Requirements
|
7
6
|
|
@@ -18,7 +17,7 @@ This allows you to store tabular data (eg. tsv, csv) into elasticsearch.
|
|
18
17
|
```pig
|
19
18
|
%default ES_JAR_DIR '/usr/local/share/elasticsearch/lib'
|
20
19
|
%default INDEX 'ufo_sightings'
|
21
|
-
%default OBJ 'sighting'
|
20
|
+
%default OBJ 'sighting'
|
22
21
|
|
23
22
|
register target/wonderdog*.jar;
|
24
23
|
register $ES_JAR_DIR/*.jar;
|
@@ -101,7 +100,7 @@ bin/estool refresh --index users
|
|
101
100
|
You'll definitely want to do this after the bulk load finishes so you don't lose any data in case of cluster failure:
|
102
101
|
|
103
102
|
```
|
104
|
-
bin/estool snapshot --index users
|
103
|
+
bin/estool snapshot --index users
|
105
104
|
```
|
106
105
|
|
107
106
|
* Bump the replicas for the index up to at least one.
|
@@ -165,7 +164,7 @@ bin/estool optimize -c <elasticsearch_host> --index <index_name>
|
|
165
164
|
* Snapshot an index
|
166
165
|
|
167
166
|
```
|
168
|
-
bin/estool snapshot -c <elasticsearch_host> --index <index_name>
|
167
|
+
bin/estool snapshot -c <elasticsearch_host> --index <index_name>
|
169
168
|
```
|
170
169
|
|
171
170
|
* Delete an index
|
@@ -173,3 +172,118 @@ bin/estool snapshot -c <elasticsearch_host> --index <index_name>
|
|
173
172
|
```
|
174
173
|
bin/estool delete -c <elasticsearch_host> --index <index_name>
|
175
174
|
```
|
175
|
+
|
176
|
+
|
177
|
+
## Bulk Loading Tips for the Risk-seeking Dangermouse
|
178
|
+
|
179
|
+
The file examples/bulkload_pageviews.pig shows an example of bulk loading elasticsearch, including preparing the index.
|
180
|
+
|
181
|
+
### Elasticsearch Setup
|
182
|
+
|
183
|
+
Some tips for an industrial-strength cluster, assuming exclusive use of machines and no read load during the job:
|
184
|
+
|
185
|
+
* use multiple machines with a fair bit of ram (7+GB). Heap doesn't help too much for loading though, so you don't have to go nuts: we do fine with amazon m1.large's.
|
186
|
+
* Allocate a sizeable heap, setting min and max equal, and
|
187
|
+
- turn `bootstrap.mlockall` on, and run `ulimit -l unlimited`.
|
188
|
+
- For example, for a 3GB heap: `-Xmx3000m -Xms3000m -Delasticsearch.bootstrap.mlockall=true`
|
189
|
+
- Never use a heap above 12GB or so, it's dangerous (STW compaction timeouts).
|
190
|
+
- You've succeeded if the full heap size is resident on startup: that is, in htop both the VMEM and RSS are 3000 MB or so.
|
191
|
+
* temporarily increase the `index_buffer_size`, to say 40%.
|
192
|
+
|
193
|
+
### Further reading
|
194
|
+
|
195
|
+
* [Elasticsearch JVM Settings, explained](http://jprante.github.com/2012/11/28/Elasticsearch-Java-Virtual-Machine-settings-explained.html)
|
196
|
+
|
197
|
+
### Example of creating an index and mapping
|
198
|
+
|
199
|
+
Index:
|
200
|
+
|
201
|
+
curl -XPUT ''http://localhost:9200/pageviews' -d '{"settings": {
|
202
|
+
"index": { "number_of_shards": 12, "store.compress": { "stored": true, "tv": true } } }}'
|
203
|
+
|
204
|
+
$ curl -XPUT 'http://localhost:9200/ufo_sightings/_settings?pretty=true' -d '{"settings": {
|
205
|
+
"index": { "number_of_shards": 12, "store.compress": { "stored": true, "tv": true } } }}'
|
206
|
+
|
207
|
+
Mapping (elasticsearch "type"):
|
208
|
+
|
209
|
+
# Wikipedia Pageviews
|
210
|
+
curl -XPUT ''http://localhost:9200/pageviews/pagehour/_mapping' -d '{
|
211
|
+
"pagehour": { "_source": { "enabled" : true }, "properties" : {
|
212
|
+
"page_id" : { "type": "long", "store": "yes" },
|
213
|
+
"namespace": { "type": "integer", "store": "yes" },
|
214
|
+
"title": { "type": "string", "store": "yes" },
|
215
|
+
"num_visitors": { "type": "long", "store": "yes" },
|
216
|
+
"date": { "type": "integer", "store": "yes" },
|
217
|
+
"time": { "type": "long", "store": "yes" },
|
218
|
+
"ts": { "type": "date", "store": "yes" },
|
219
|
+
"day_of_week": { "type": "integer", "store": "yes" } } }}'
|
220
|
+
|
221
|
+
$ curl -XPUT 'http://localhost:9200/ufo_sightings/sighting/_mapping' -d '{ "sighting": {
|
222
|
+
"_source": { "enabled" : true },
|
223
|
+
"properties" : {
|
224
|
+
"sighted_at": { "type": "date", "store": "yes" },
|
225
|
+
"reported_at": { "type": "date", "store": "yes" },
|
226
|
+
"shape": { "type": "string", "store": "yes" },
|
227
|
+
"duration": { "type": "string", "store": "yes" },
|
228
|
+
"description": { "type": "string", "store": "yes" },
|
229
|
+
"coordinates": { "type": "geo_point", "store": "yes" },
|
230
|
+
"location_str": { "type": "string", "store": "no" },
|
231
|
+
"location": { "type": "object", "dynamic": false, "properties": {
|
232
|
+
"place_id": { "type": "string", "store": "yes" },
|
233
|
+
"place_type": { "type": "string", "store": "yes" },
|
234
|
+
"city": { "type": "string", "store": "yes" },
|
235
|
+
"county": { "type": "string", "store": "yes" },
|
236
|
+
"state": { "type": "string", "store": "yes" },
|
237
|
+
"country": { "type": "string", "store": "yes" } } }
|
238
|
+
} } }'
|
239
|
+
|
240
|
+
|
241
|
+
### Temporary Bulk-load settings for an index
|
242
|
+
|
243
|
+
To prepare a database for bulk loading, the following settings may help. They are
|
244
|
+
*EXTREMELY* aggressive, and include knocking the replication factor back to 1 (zero replicas). One
|
245
|
+
false step and you've destroyed Tokyo.
|
246
|
+
|
247
|
+
Actually, you know what? Never mind. Don't apply these, they're too crazy.
|
248
|
+
|
249
|
+
curl -XPUT 'http://localhost:9200/pageviews/_settings?pretty=true' -d '{"index": {
|
250
|
+
"number_of_replicas": 0, "refresh_interval": -1, "gateway.snapshot_interval": -1,
|
251
|
+
"translog": { "flush_threshold_ops": 50000, "flush_threshold_size": "200mb", "flush_threshold_period": "300s" },
|
252
|
+
"merge.policy": { "max_merge_at_once": 30, "segments_per_tier": 30, "floor_segment": "10mb" },
|
253
|
+
"store.compress": { "stored": true, "tv": true } } }'
|
254
|
+
|
255
|
+
To restore your settings, in case you didn't destroy Tokyo:
|
256
|
+
|
257
|
+
curl -XPUT 'http://localhost:9200/pageviews/_settings?pretty=true' -d ' {"index": {
|
258
|
+
"number_of_replicas": 2, "refresh_interval": "60s", "gateway.snapshot_interval": "3600s",
|
259
|
+
"translog": { "flush_threshold_ops": 5000, "flush_threshold_size": "200mb", "flush_threshold_period": "300s" },
|
260
|
+
"merge.policy": { "max_merge_at_once": 10, "segments_per_tier": 10, "floor_segment": "10mb" },
|
261
|
+
"store.compress": { "stored": true, "tv": true } } }'
|
262
|
+
|
263
|
+
If you did destroy your database, please send your resume to jobs@infochimps.com as you begin your
|
264
|
+
job hunt. It's the reformed sinner that makes the best missionary.
|
265
|
+
|
266
|
+
|
267
|
+
### Post-bulkrun maintenance
|
268
|
+
|
269
|
+
es_index=pageviews ; ( for foo in _flush _refresh '_optimize?max_num_segments=6&refresh=true&flush=true&wait_for_merge=true' '_gateway/snapshot' ; do echo "======= $foo" ; time curl -XPOST "http://localhost:9200/$es_index/$foo" ; done ) &
|
270
|
+
|
271
|
+
### Full dump of cluster health
|
272
|
+
|
273
|
+
es_index=pageviews ; es_node="projectes-elasticsearch-4"
|
274
|
+
curl -XGET "http://localhost:9200/$es_index/_status?pretty=true"
|
275
|
+
curl -XGET "http://localhost:9200/_cluster/state?pretty=true"
|
276
|
+
curl -XGET "http://localhost:9200/$es_index/_stats?pretty=true&merge=true&refresh=true&flush=true&warmer=true"
|
277
|
+
curl -XGET "http://localhost:9200/_cluster/nodes/$es_node/stats?pretty=true&all=true"
|
278
|
+
curl -XGET "http://localhost:9200/_cluster/nodes/$es_node?pretty=true&all=true"
|
279
|
+
curl -XGET "http://localhost:9200/_cluster/health?pretty=true"
|
280
|
+
curl -XGET "http://localhost:9200/$es_index/_search?pretty=true&limit=3"
|
281
|
+
curl -XGET "http://localhost:9200/$es_index/_segments?pretty=true" | head -n 200
|
282
|
+
|
283
|
+
### Decommission nodes
|
284
|
+
|
285
|
+
Run this, excluding the decommissionable nodes from the list:
|
286
|
+
|
287
|
+
curl -XPUT http://localhost:9200/pageviews/_settings -d '{
|
288
|
+
"index.routing.allocation.include.ironfan_name" :
|
289
|
+
"projectes-elasticsearch-0,projectes-elasticsearch-1,projectes-elasticsearch-2" }'
|
@@ -0,0 +1,70 @@
|
|
1
|
+
SET mapred.map.tasks.speculative.execution false;
|
2
|
+
|
3
|
+
-- path to wikipedia pageviews data
|
4
|
+
%default PAGEVIEWS 's3n://bigdata.chimpy.us/data/results/wikipedia/full/pageviews/2008/03'
|
5
|
+
-- the target elasticsearch index and mapping ("type"). Will be created, though you
|
6
|
+
-- should do it yourself first instead as shown below.
|
7
|
+
%default INDEX 'pageviews'
|
8
|
+
%default OBJ 'pagehour'
|
9
|
+
-- path to elasticsearch jars
|
10
|
+
%default ES_JAR_DIR '/usr/local/share/elasticsearch/lib'
|
11
|
+
-- Batch size for loading
|
12
|
+
%default BATCHSIZE '10000'
|
13
|
+
|
14
|
+
-- Example of bulk loading. This will easily load more than a billion documents
|
15
|
+
-- into a large cluster. We recommend using Ironfan to set your junk up.
|
16
|
+
--
|
17
|
+
-- Preparation:
|
18
|
+
--
|
19
|
+
-- Create the index:
|
20
|
+
--
|
21
|
+
-- curl -XPUT 'http://projectes-elasticsearch-0.test.chimpy.us:9200/pageviews' -d '{"settings": { "index": {
|
22
|
+
-- "number_of_shards": 12, "number_of_replicas": 0, "store.compress": { "stored": true, "tv": true } } }}'
|
23
|
+
--
|
24
|
+
-- Define the elasticsearch mapping (type):
|
25
|
+
--
|
26
|
+
-- curl -XPUT 'http://projectes-elasticsearch-0.test.chimpy.us:9200/pageviews/pagehour/_mapping' -d '{
|
27
|
+
-- "pagehour": {
|
28
|
+
-- "_source": { "enabled" : true },
|
29
|
+
-- "properties" : {
|
30
|
+
-- "page_id" : { "type": "long", "store": "yes" },
|
31
|
+
-- "namespace": { "type": "integer", "store": "yes" },
|
32
|
+
-- "title": { "type": "string", "store": "yes" },
|
33
|
+
-- "num_visitors": { "type": "long", "store": "yes" },
|
34
|
+
-- "date": { "type": "integer", "store": "yes" },
|
35
|
+
-- "time": { "type": "long", "store": "yes" },
|
36
|
+
-- "ts": { "type": "date", "store": "yes" },
|
37
|
+
-- "day_of_week": { "type": "integer", "store": "yes" } } }}'
|
38
|
+
--
|
39
|
+
-- For best results, see the 'Tips for Bulk Loading' in the README.
|
40
|
+
--
|
41
|
+
|
42
|
+
-- Always disable speculative execution when loading into a database
|
43
|
+
set mapred.map.tasks.speculative.execution false
|
44
|
+
-- Don't re-use JVM: logging gets angry
|
45
|
+
set mapred.job.reuse.jvm.num.tasks 1
|
46
|
+
-- Use large file sizes; setup/teardown time for leaving the cluster is worse
|
47
|
+
-- than non-local map tasks
|
48
|
+
set mapred.min.split.size 3000MB
|
49
|
+
set pig.maxCombinedSplitSize 2000MB
|
50
|
+
set pig.splitCombination true
|
51
|
+
|
52
|
+
register ./target/wonderdog*.jar;
|
53
|
+
register $ES_JAR_DIR/*.jar;
|
54
|
+
|
55
|
+
pageviews = LOAD '$PAGEVIEWS' AS (
|
56
|
+
page_id:long, namespace:int, title:chararray,
|
57
|
+
num_visitors:long, date:int, time:long,
|
58
|
+
epoch_time:long, day_of_week:int);
|
59
|
+
pageviews_fixed = FOREACH pageviews GENERATE
|
60
|
+
page_id, namespace, title,
|
61
|
+
num_visitors, date, time,
|
62
|
+
epoch_time * 1000L AS ts, day_of_week;
|
63
|
+
|
64
|
+
STORE pageviews_fixed INTO 'es://$INDEX/$OBJ?json=false&size=$BATCHSIZE' USING com.infochimps.elasticsearch.pig.ElasticSearchStorage();
|
65
|
+
|
66
|
+
-- -- To instead dump the JSON data to disk (needs Pig 0.10+)
|
67
|
+
-- set dfs.replication 2
|
68
|
+
-- %default OUTDUMP '$PAGEVIEWS.json'
|
69
|
+
-- rmf $OUTDUMP
|
70
|
+
-- STORE pageviews_fixed INTO '$OUTDUMP' USING JsonStorage();
|
data/lib/wonderdog/version.rb
CHANGED
@@ -40,13 +40,13 @@ import org.elasticsearch.ExceptionsHelper;
|
|
40
40
|
import com.infochimps.elasticsearch.hadoop.util.HadoopUtils;
|
41
41
|
|
42
42
|
/**
|
43
|
-
|
43
|
+
|
44
44
|
Hadoop OutputFormat for writing arbitrary MapWritables (essentially HashMaps) into Elasticsearch. Records are batched up and sent
|
45
45
|
in a one-hop manner to the elastic search data nodes that will index them.
|
46
|
-
|
46
|
+
|
47
47
|
*/
|
48
48
|
public class ElasticSearchOutputFormat extends OutputFormat<NullWritable, MapWritable> implements Configurable {
|
49
|
-
|
49
|
+
|
50
50
|
static Log LOG = LogFactory.getLog(ElasticSearchOutputFormat.class);
|
51
51
|
private Configuration conf = null;
|
52
52
|
|
@@ -60,12 +60,13 @@ public class ElasticSearchOutputFormat extends OutputFormat<NullWritable, MapWri
|
|
60
60
|
private String idFieldName;
|
61
61
|
private String objType;
|
62
62
|
private String[] fieldNames;
|
63
|
-
|
63
|
+
|
64
64
|
// Used for bookkeeping purposes
|
65
65
|
private AtomicLong totalBulkTime = new AtomicLong();
|
66
66
|
private AtomicLong totalBulkItems = new AtomicLong();
|
67
|
-
private Random randgen = new Random();
|
67
|
+
private Random randgen = new Random();
|
68
68
|
private long runStartTime = System.currentTimeMillis();
|
69
|
+
private long lastLogTime = 0;
|
69
70
|
|
70
71
|
// For hadoop configuration
|
71
72
|
private static final String ES_CONFIG_NAME = "elasticsearch.yml";
|
@@ -82,7 +83,7 @@ public class ElasticSearchOutputFormat extends OutputFormat<NullWritable, MapWri
|
|
82
83
|
private static final String COMMA = ",";
|
83
84
|
private static final String SLASH = "/";
|
84
85
|
private static final String NO_ID_FIELD = "-1";
|
85
|
-
|
86
|
+
|
86
87
|
private volatile BulkRequestBuilder currentRequest;
|
87
88
|
|
88
89
|
/**
|
@@ -104,7 +105,7 @@ public class ElasticSearchOutputFormat extends OutputFormat<NullWritable, MapWri
|
|
104
105
|
<li><b>elasticsearch.id.field.name</b> - When <b>elasticsearch.is_json</b> is true, this is the name of a field in the json document that contains the document's id. If -1 is used then the document is assumed to have no id and one is assigned to it by elasticsearch.</li>
|
105
106
|
<li><b>elasticsearch.field.names</b> - When <b>elasticsearch.is_json</b> is false, this is a comma separated list of field names.</li>
|
106
107
|
<li><b>elasticsearch.id.field</b> - When <b>elasticsearch.is_json</b> is false, this is the numeric index of the field to use as the document id. If -1 is used the document is assumed to have no id and one is assigned to it by elasticsearch.</li>
|
107
|
-
</ul>
|
108
|
+
</ul>
|
108
109
|
*/
|
109
110
|
public ElasticSearchRecordWriter(TaskAttemptContext context) {
|
110
111
|
Configuration conf = context.getConfiguration();
|
@@ -118,7 +119,7 @@ public class ElasticSearchOutputFormat extends OutputFormat<NullWritable, MapWri
|
|
118
119
|
LOG.info("Using field:["+idFieldName+"] for document ids");
|
119
120
|
}
|
120
121
|
this.objType = conf.get(ES_OBJECT_TYPE);
|
121
|
-
|
122
|
+
|
122
123
|
//
|
123
124
|
// Fetches elasticsearch.yml and the plugins directory from the distributed cache, or
|
124
125
|
// from the local config.
|
@@ -134,7 +135,7 @@ public class ElasticSearchOutputFormat extends OutputFormat<NullWritable, MapWri
|
|
134
135
|
System.setProperty(ES_CONFIG,conf.get(ES_CONFIG));
|
135
136
|
System.setProperty(ES_PLUGINS,conf.get(ES_PLUGINS));
|
136
137
|
}
|
137
|
-
|
138
|
+
|
138
139
|
start_embedded_client();
|
139
140
|
initialize_index(indexName);
|
140
141
|
currentRequest = client.prepareBulk();
|
@@ -144,7 +145,7 @@ public class ElasticSearchOutputFormat extends OutputFormat<NullWritable, MapWri
|
|
144
145
|
Closes the connection to elasticsearch. Any documents remaining in the bulkRequest object are indexed.
|
145
146
|
*/
|
146
147
|
public void close(TaskAttemptContext context) throws IOException {
|
147
|
-
if (currentRequest.numberOfActions() > 0) {
|
148
|
+
if (currentRequest.numberOfActions() > 0) {
|
148
149
|
try {
|
149
150
|
BulkResponse response = currentRequest.execute().actionGet();
|
150
151
|
} catch (Exception e) {
|
@@ -175,7 +176,7 @@ public class ElasticSearchOutputFormat extends OutputFormat<NullWritable, MapWri
|
|
175
176
|
try {
|
176
177
|
Text mapKey = new Text(idFieldName);
|
177
178
|
String record_id = fields.get(mapKey).toString();
|
178
|
-
currentRequest.add(Requests.indexRequest(indexName).id(record_id).type(objType).create(false).source(builder));
|
179
|
+
currentRequest.add(Requests.indexRequest(indexName).id(record_id).type(objType).create(false).source(builder));
|
179
180
|
} catch (Exception e) {
|
180
181
|
LOG.warn("Encountered malformed record");
|
181
182
|
}
|
@@ -198,14 +199,14 @@ public class ElasticSearchOutputFormat extends OutputFormat<NullWritable, MapWri
|
|
198
199
|
} else if (value instanceof FloatWritable) {
|
199
200
|
builder.value(((FloatWritable)value).get());
|
200
201
|
} else if (value instanceof BooleanWritable) {
|
201
|
-
builder.value(((BooleanWritable)value).get());
|
202
|
+
builder.value(((BooleanWritable)value).get());
|
202
203
|
} else if (value instanceof MapWritable) {
|
203
204
|
builder.startObject();
|
204
205
|
for (Map.Entry<Writable,Writable> entry : ((MapWritable)value).entrySet()) {
|
205
206
|
if (!(entry.getValue() instanceof NullWritable)) {
|
206
207
|
builder.field(entry.getKey().toString());
|
207
208
|
buildContent(builder, entry.getValue());
|
208
|
-
}
|
209
|
+
}
|
209
210
|
}
|
210
211
|
builder.endObject();
|
211
212
|
} else if (value instanceof ArrayWritable) {
|
@@ -215,7 +216,7 @@ public class ElasticSearchOutputFormat extends OutputFormat<NullWritable, MapWri
|
|
215
216
|
buildContent(builder, arrayOfThings[i]);
|
216
217
|
}
|
217
218
|
builder.endArray();
|
218
|
-
}
|
219
|
+
}
|
219
220
|
}
|
220
221
|
|
221
222
|
/**
|
@@ -224,12 +225,21 @@ public class ElasticSearchOutputFormat extends OutputFormat<NullWritable, MapWri
|
|
224
225
|
private void processBulkIfNeeded() {
|
225
226
|
totalBulkItems.incrementAndGet();
|
226
227
|
if (currentRequest.numberOfActions() >= bulkSize) {
|
227
|
-
|
228
|
+
boolean loggable = (System.currentTimeMillis() - lastLogTime >= 10000);
|
229
|
+
|
230
|
+
try {
|
228
231
|
long startTime = System.currentTimeMillis();
|
232
|
+
if (loggable){ LOG.info("Sending [" + (currentRequest.numberOfActions()) + "]items"); }
|
229
233
|
BulkResponse response = currentRequest.execute().actionGet();
|
230
234
|
totalBulkTime.addAndGet(System.currentTimeMillis() - startTime);
|
231
|
-
if (
|
232
|
-
|
235
|
+
if (loggable) {
|
236
|
+
LOG.info("Indexed [" + (currentRequest.numberOfActions()) + "]items " +
|
237
|
+
"in [" + ((System.currentTimeMillis() - startTime)/1000) + "]s; " +
|
238
|
+
"avg [" + (float)(1000.0*totalBulkItems.get())/(System.currentTimeMillis() - runStartTime) + "]rec/s" +
|
239
|
+
"(total [" + totalBulkItems.get() + "]items " +
|
240
|
+
"indexed in [" + (totalBulkTime.get()/1000) + "]s, " +
|
241
|
+
"wall clock [" + ((System.currentTimeMillis() - runStartTime)/1000) + "]s)");
|
242
|
+
lastLogTime = System.currentTimeMillis();
|
233
243
|
}
|
234
244
|
} catch (Exception e) {
|
235
245
|
LOG.warn("Bulk request failed: " + e.getMessage());
|
data/wonderdog.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wonderdog
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ authors:
|
|
13
13
|
autorequire:
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
|
-
date: 2013-
|
16
|
+
date: 2013-03-07 00:00:00.000000000 Z
|
17
17
|
dependencies:
|
18
18
|
- !ruby/object:Gem::Dependency
|
19
19
|
name: wukong-hadoop
|
@@ -22,7 +22,7 @@ dependencies:
|
|
22
22
|
requirements:
|
23
23
|
- - '='
|
24
24
|
- !ruby/object:Gem::Version
|
25
|
-
version: 0.1.
|
25
|
+
version: 0.1.1
|
26
26
|
type: :runtime
|
27
27
|
prerelease: false
|
28
28
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -30,7 +30,7 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - '='
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.1.
|
33
|
+
version: 0.1.1
|
34
34
|
description: ! " Wonderdog provides code in both Ruby and Java to make Elasticsearch\n
|
35
35
|
\ a more fully-fledged member of both the Hadoop and Wukong\n ecosystems.\n\n For
|
36
36
|
the Java side, Wonderdog provides InputFormat and OutputFormat\n classes for use
|
@@ -59,6 +59,7 @@ files:
|
|
59
59
|
- config/more_settings.yml
|
60
60
|
- config/run_elasticsearch-2.sh
|
61
61
|
- config/ufo_config.json
|
62
|
+
- examples/bulkload_wp_pageviews.pig
|
62
63
|
- examples/no_wonderdog.rb
|
63
64
|
- examples/wonderdog.rb
|
64
65
|
- lib/wonderdog.rb
|
@@ -113,15 +114,21 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
113
114
|
- - ! '>='
|
114
115
|
- !ruby/object:Gem::Version
|
115
116
|
version: '0'
|
117
|
+
segments:
|
118
|
+
- 0
|
119
|
+
hash: -2901634710812664464
|
116
120
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
117
121
|
none: false
|
118
122
|
requirements:
|
119
123
|
- - ! '>='
|
120
124
|
- !ruby/object:Gem::Version
|
121
125
|
version: '0'
|
126
|
+
segments:
|
127
|
+
- 0
|
128
|
+
hash: -2901634710812664464
|
122
129
|
requirements: []
|
123
130
|
rubyforge_project:
|
124
|
-
rubygems_version: 1.8.
|
131
|
+
rubygems_version: 1.8.24
|
125
132
|
signing_key:
|
126
133
|
specification_version: 3
|
127
134
|
summary: Make Hadoop and ElasticSearch play together nicely.
|