embulk-executor-mapreduce 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5c531e6955469a01f0e2ed716a65fdf228ae95ba
4
- data.tar.gz: d8724a7abcaedd7549a397d2b14df6edb2832d52
3
+ metadata.gz: 5ac810376cbc4895b8745c82dcd642ad8302bf4f
4
+ data.tar.gz: 34b4f9a372be43b2915878b35954d75c25f7e8d8
5
5
  SHA512:
6
- metadata.gz: 9983809cd453596cf3fc683f75a22b43dbcca0abf202c7ad7bdc9cbb640c673e95be706f6d49ad04bb3881f8668bc9b8e3946115d54e2a14e90dfb163fbd9b0f
7
- data.tar.gz: 7b272911c78f9bac87fa867f0e3978a96f31c8fb348b150410a6991e1f5f425782450c9422ca4022e2cb15abc61037caaf080fb196f48bd0a441e3e81cc31b15
6
+ metadata.gz: 96fd33f8766a7510776f8253702c04291e675ea62c6ed05cca05958c905383770afc63d4056df1aaa8bac8be4d0175ba5dd4ada150d86a31b6375e2d1a7c222f
7
+ data.tar.gz: 01c46e09dd99360ce835ba419819da22d2a711b8a4cebf47382cb19092730e1608f76ae80bf79f498cc363fdbda0f0fd1fc4932a592e76f0b264b7b1e055d7a4
@@ -1,6 +1,7 @@
1
1
  package org.embulk.executor.mapreduce;
2
2
 
3
3
  import java.util.List;
4
+ import java.util.ArrayList;
4
5
  import java.util.Collection;
5
6
  import java.util.Set;
6
7
  import java.util.Map;
@@ -9,6 +10,7 @@ import java.util.HashMap;
9
10
  import java.io.File;
10
11
  import java.io.IOException;
11
12
  import java.io.EOFException;
13
+ import java.nio.file.FileSystems;
12
14
  import java.net.URI;
13
15
  import java.net.URISyntaxException;
14
16
  import java.net.URL;
@@ -239,7 +241,7 @@ public class MapReduceExecutor
239
241
  }
240
242
 
241
243
  // jar files
242
- Iterable<Path> jars = collectJars(task.getLibjars());
244
+ List<Path> jars = collectJars(task.getLibjars(), task.getExcludeJars());
243
245
  job.getConfiguration().set("tmpjars", StringUtils.join(",", jars));
244
246
 
245
247
  job.setInputFormatClass(EmbulkInputFormat.class);
@@ -304,7 +306,7 @@ public class MapReduceExecutor
304
306
  }
305
307
  }
306
308
 
307
- private static Iterable<Path> collectJars(List<String> extraJars)
309
+ private List<Path> collectJars(List<String> extraJars, List<String> excludeJars)
308
310
  {
309
311
  Set<Path> set = new HashSet<Path>();
310
312
 
@@ -325,7 +327,29 @@ public class MapReduceExecutor
325
327
  }
326
328
  }
327
329
 
328
- return set;
330
+ // validate jar files
331
+ List<Path> uses = new ArrayList<>(set.size());
332
+ for (Path path : set) {
333
+ String fileName = path.getName();
334
+ if (globMatchesWithAnyOf(excludeJars, fileName)) {
335
+ log.debug("Excluding jar '"+path+"'");
336
+ }
337
+ else {
338
+ uses.add(path);
339
+ }
340
+ }
341
+
342
+ return uses;
343
+ }
344
+
345
+ private static boolean globMatchesWithAnyOf(List<String> excludeJars, String fileName)
346
+ {
347
+ for (String pattern : excludeJars) {
348
+ if (FileSystems.getDefault().getPathMatcher("glob:"+pattern).matches(java.nio.file.Paths.get(fileName))) {
349
+ return true;
350
+ }
351
+ }
352
+ return false;
329
353
  }
330
354
 
331
355
  private static void collectURLClassLoaderJars(Set<Path> set, ClassLoader cl)
@@ -31,6 +31,10 @@ public interface MapReduceExecutorTask
31
31
  @ConfigDefault("[]")
32
32
  public List<String> getLibjars();
33
33
 
34
+ @Config("exclude_jars")
35
+ @ConfigDefault("[]")
36
+ public List<String> getExcludeJars();
37
+
34
38
  @Config("state_path")
35
39
  @ConfigDefault("\"/tmp/embulk\"")
36
40
  public String getStatePath();
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-executor-mapreduce
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-08-19 00:00:00.000000000 Z
11
+ date: 2015-10-26 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Executes tasks on Hadoop.
14
14
  email:
@@ -61,7 +61,7 @@ files:
61
61
  - classpath/curator-client-2.6.0.jar
62
62
  - classpath/curator-framework-2.6.0.jar
63
63
  - classpath/curator-recipes-2.6.0.jar
64
- - classpath/embulk-executor-mapreduce-0.2.0.jar
64
+ - classpath/embulk-executor-mapreduce-0.2.1.jar
65
65
  - classpath/gson-2.2.4.jar
66
66
  - classpath/hadoop-annotations-2.6.0.jar
67
67
  - classpath/hadoop-auth-2.6.0.jar