embulk-executor-mapreduce 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. checksums.yaml +4 -4
  2. data/classpath/{embulk-executor-mapreduce-0.2.2.jar → embulk-executor-mapreduce-0.2.3.jar} +0 -0
  3. data/src/main/java/org/embulk/executor/mapreduce/MapReduceExecutor.java +48 -24
  4. data/src/main/java/org/embulk/executor/mapreduce/TimestampPartitioning.java +11 -6
  5. data/src/test/java/org/embulk/executor/mapreduce/MapReduceExecutorTestRuntime.java +130 -0
  6. data/src/test/java/org/embulk/executor/mapreduce/TestAttemptState.java +58 -0
  7. data/src/test/java/org/embulk/executor/mapreduce/TestEmbulkInputFormat.java +54 -0
  8. data/src/test/java/org/embulk/executor/mapreduce/TestEmbulkInputSplit.java +46 -0
  9. data/src/test/java/org/embulk/executor/mapreduce/TestEmbulkRecordReader.java +25 -0
  10. data/src/test/java/org/embulk/executor/mapreduce/TestMapReduceExecutor.java +251 -0
  11. data/src/test/java/org/embulk/executor/mapreduce/TestPageBufferWritable.java +84 -0
  12. data/src/test/java/org/embulk/executor/mapreduce/TestTimestampPartitioning.java +222 -0
  13. data/src/test/resources/config/core-site.xml +8 -0
  14. data/src/test/resources/config/embulk_mapred_config.yml +38 -0
  15. data/src/test/resources/config/embulk_mapred_invalid_config_files_config.yml +38 -0
  16. data/src/test/resources/config/embulk_mapred_invalid_libjars_config.yml +40 -0
  17. data/src/test/resources/config/embulk_mapred_invalid_partitioning_config.yml +40 -0
  18. data/src/test/resources/config/embulk_mapred_invalid_reducers_config.yml +44 -0
  19. data/src/test/resources/config/embulk_mapred_partitioning_config.yml +43 -0
  20. data/src/test/resources/config/embulk_mapred_stop_on_invalid_record_config.yml +39 -0
  21. data/src/test/resources/config/hdfs-site.xml +18 -0
  22. data/src/test/resources/config/mapred-site.xml +8 -0
  23. data/src/test/resources/fixtures/csv/sample1.csv +3 -0
  24. data/src/test/resources/fixtures/csv/sample2.csv +4 -0
  25. data/src/test/resources/fixtures/invalid_csv/sample1.csv +4 -0
  26. data/src/test/resources/fixtures/invalid_csv/sample2.csv +3 -0
  27. metadata +25 -3
@@ -0,0 +1,8 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3
+ <configuration>
4
+ <property>
5
+ <name>fs.defaultFS</name>
6
+ <value>file:///tmp/</value>
7
+ </property>
8
+ </configuration>
@@ -0,0 +1,38 @@
1
+ exec:
2
+ type: mapreduce
3
+ config_files:
4
+ - src/test/resources/config/core-site.xml
5
+ - src/test/resources/config/hdfs-site.xml
6
+ - src/test/resources/config/mapred-site.xml
7
+ config:
8
+ k1: v1
9
+ k2: v2
10
+ state_path: 'file:///tmp/embulk/'
11
+ job_name: embulk_mapred_0001
12
+ exclude_jars:
13
+ - '*log4j-over-slf4j*'
14
+ in:
15
+ type: file
16
+ path_prefix: src/test/resources/fixtures/csv/sample
17
+ parser:
18
+ charset: UTF-8
19
+ newline: CRLF
20
+ type: csv
21
+ delimiter: ','
22
+ quote: ''
23
+ escape: ''
24
+ skip_header_lines: 1
25
+ columns:
26
+ - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S"}
27
+ - {name: host, type: string}
28
+ - {name: path, type: string}
29
+ - {name: method, type: string}
30
+ - {name: referer, type: string}
31
+ - {name: code, type: long}
32
+ - {name: agent, type: string}
33
+ - {name: user, type: string}
34
+ - {name: size, type: long}
35
+ - {name: d, type: double}
36
+ - {name: flag, type: boolean}
37
+ out:
38
+ type: stdout
@@ -0,0 +1,38 @@
1
+ exec:
2
+ type: mapreduce
3
+ config_files:
4
+ - src/test/resources/config/invalid-core-site.xml
5
+ - src/test/resources/config/invalid-hdfs-site.xml
6
+ - src/test/resources/config/invalid-mapred-site.xml
7
+ config:
8
+ k1: v1
9
+ k2: v2
10
+ state_path: 'file:///tmp/embulk/'
11
+ job_name: embulk_mapred_0001
12
+ exclude_jars:
13
+ - '*log4j-over-slf4j*'
14
+ in:
15
+ type: file
16
+ path_prefix: src/test/resources/fixtures/csv/sample
17
+ parser:
18
+ charset: UTF-8
19
+ newline: CRLF
20
+ type: csv
21
+ delimiter: ','
22
+ quote: ''
23
+ escape: ''
24
+ skip_header_lines: 1
25
+ columns:
26
+ - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S"}
27
+ - {name: host, type: string}
28
+ - {name: path, type: string}
29
+ - {name: method, type: string}
30
+ - {name: referer, type: string}
31
+ - {name: code, type: long}
32
+ - {name: agent, type: string}
33
+ - {name: user, type: string}
34
+ - {name: size, type: long}
35
+ - {name: d, type: double}
36
+ - {name: flag, type: boolean}
37
+ out:
38
+ type: stdout
@@ -0,0 +1,40 @@
1
+ exec:
2
+ type: mapreduce
3
+ config_files:
4
+ - src/test/resources/config/core-site.xml
5
+ - src/test/resources/config/hdfs-site.xml
6
+ - src/test/resources/config/mapred-site.xml
7
+ config:
8
+ k1: v1
9
+ k2: v2
10
+ state_path: 'file:///tmp/embulk/'
11
+ job_name: embulk_mapred_0001
12
+ libjars:
13
+ - invalid_jar
14
+ exclude_jars:
15
+ - '*log4j-over-slf4j*'
16
+ in:
17
+ type: file
18
+ path_prefix: src/test/resources/fixtures/csv/sample
19
+ parser:
20
+ charset: UTF-8
21
+ newline: CRLF
22
+ type: csv
23
+ delimiter: ','
24
+ quote: ''
25
+ escape: ''
26
+ skip_header_lines: 1
27
+ columns:
28
+ - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S"}
29
+ - {name: host, type: string}
30
+ - {name: path, type: string}
31
+ - {name: method, type: string}
32
+ - {name: referer, type: string}
33
+ - {name: code, type: long}
34
+ - {name: agent, type: string}
35
+ - {name: user, type: string}
36
+ - {name: size, type: long}
37
+ - {name: d, type: double}
38
+ - {name: flag, type: boolean}
39
+ out:
40
+ type: stdout
@@ -0,0 +1,40 @@
1
+ exec:
2
+ type: mapreduce
3
+ config_files:
4
+ - src/test/resources/config/core-site.xml
5
+ - src/test/resources/config/hdfs-site.xml
6
+ - src/test/resources/config/mapred-site.xml
7
+ config:
8
+ k1: v1
9
+ k2: v2
10
+ partitioning:
11
+ type: long
12
+ state_path: 'file:///tmp/embulk/'
13
+ job_name: embulk_mapred_partitioning_0001
14
+ exclude_jars:
15
+ - '*log4j-over-slf4j*'
16
+ in:
17
+ type: file
18
+ path_prefix: src/test/resources/fixtures/csv/sample
19
+ parser:
20
+ charset: UTF-8
21
+ newline: CRLF
22
+ type: csv
23
+ delimiter: ','
24
+ quote: ''
25
+ escape: ''
26
+ skip_header_lines: 1
27
+ columns:
28
+ - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S"}
29
+ - {name: host, type: string}
30
+ - {name: path, type: string}
31
+ - {name: method, type: string}
32
+ - {name: referer, type: string}
33
+ - {name: code, type: long}
34
+ - {name: agent, type: string}
35
+ - {name: user, type: string}
36
+ - {name: size, type: long}
37
+ - {name: d, type: double}
38
+ - {name: flag, type: boolean}
39
+ out:
40
+ type: stdout
@@ -0,0 +1,44 @@
1
+ exec:
2
+ type: mapreduce
3
+ config_files:
4
+ - src/test/resources/config/core-site.xml
5
+ - src/test/resources/config/hdfs-site.xml
6
+ - src/test/resources/config/mapred-site.xml
7
+ config:
8
+ k1: v1
9
+ k2: v2
10
+ partitioning:
11
+ type: timestamp
12
+ unit: hour
13
+ column: timestamp
14
+ unix_timestamp_unit: sec
15
+ reducers: -1
16
+ state_path: 'file:///tmp/embulk/'
17
+ job_name: embulk_mapred_0001
18
+ exclude_jars:
19
+ - '*log4j-over-slf4j*'
20
+ in:
21
+ type: file
22
+ path_prefix: src/test/resources/fixtures/csv/sample
23
+ parser:
24
+ charset: UTF-8
25
+ newline: CRLF
26
+ type: csv
27
+ delimiter: ','
28
+ quote: ''
29
+ escape: ''
30
+ skip_header_lines: 1
31
+ columns:
32
+ - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S"}
33
+ - {name: host, type: string}
34
+ - {name: path, type: string}
35
+ - {name: method, type: string}
36
+ - {name: referer, type: string}
37
+ - {name: code, type: long}
38
+ - {name: agent, type: string}
39
+ - {name: user, type: string}
40
+ - {name: size, type: long}
41
+ - {name: d, type: double}
42
+ - {name: flag, type: boolean}
43
+ out:
44
+ type: stdout
@@ -0,0 +1,43 @@
1
+ exec:
2
+ type: mapreduce
3
+ config_files:
4
+ - src/test/resources/config/core-site.xml
5
+ - src/test/resources/config/hdfs-site.xml
6
+ - src/test/resources/config/mapred-site.xml
7
+ config:
8
+ k1: v1
9
+ k2: v2
10
+ partitioning:
11
+ type: timestamp
12
+ unit: hour
13
+ column: timestamp
14
+ unix_timestamp_unit: sec
15
+ state_path: 'file:///tmp/embulk/'
16
+ job_name: embulk_mapred_partitioning_0001
17
+ exclude_jars:
18
+ - '*log4j-over-slf4j*'
19
+ in:
20
+ type: file
21
+ path_prefix: src/test/resources/fixtures/csv/sample
22
+ parser:
23
+ charset: UTF-8
24
+ newline: CRLF
25
+ type: csv
26
+ delimiter: ','
27
+ quote: ''
28
+ escape: ''
29
+ skip_header_lines: 1
30
+ columns:
31
+ - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S"}
32
+ - {name: host, type: string}
33
+ - {name: path, type: string}
34
+ - {name: method, type: string}
35
+ - {name: referer, type: string}
36
+ - {name: code, type: long}
37
+ - {name: agent, type: string}
38
+ - {name: user, type: string}
39
+ - {name: size, type: long}
40
+ - {name: d, type: double}
41
+ - {name: flag, type: boolean}
42
+ out:
43
+ type: stdout
@@ -0,0 +1,39 @@
1
+ exec:
2
+ type: mapreduce
3
+ config_files:
4
+ - src/test/resources/config/core-site.xml
5
+ - src/test/resources/config/hdfs-site.xml
6
+ - src/test/resources/config/mapred-site.xml
7
+ config:
8
+ k1: v1
9
+ k2: v2
10
+ state_path: 'file:///tmp/embulk/'
11
+ job_name: embulk_mapred_0001
12
+ exclude_jars:
13
+ - '*log4j-over-slf4j*'
14
+ in:
15
+ type: file
16
+ path_prefix: src/test/resources/fixtures/invalid_csv/sample
17
+ parser:
18
+ charset: UTF-8
19
+ newline: CRLF
20
+ type: csv
21
+ delimiter: ','
22
+ quote: ''
23
+ escape: ''
24
+ skip_header_lines: 1
25
+ stop_on_invalid_record: true
26
+ columns:
27
+ - {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S"}
28
+ - {name: host, type: string}
29
+ - {name: path, type: string}
30
+ - {name: method, type: string}
31
+ - {name: referer, type: string}
32
+ - {name: code, type: long}
33
+ - {name: agent, type: string}
34
+ - {name: user, type: string}
35
+ - {name: size, type: long}
36
+ - {name: d, type: double}
37
+ - {name: flag, type: boolean}
38
+ out:
39
+ type: stdout
@@ -0,0 +1,18 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3
+ <configuration>
4
+ <property>
5
+ <name>dfs.replication</name>
6
+ <value>1</value>
7
+ </property>
8
+
9
+ <property>
10
+ <name>dfs.name.dir</name>
11
+ <value>file:///tmp/hdfs/namenode</value>
12
+ </property>
13
+
14
+ <property>
15
+ <name>dfs.data.dir</name>
16
+ <value>file:///tmp/hdfs/datanode</value>
17
+ </property>
18
+ </configuration>
@@ -0,0 +1,8 @@
1
+ <?xml version="1.0"?>
2
+ <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3
+ <configuration>
4
+ <property>
5
+ <name>mapreduce.framework.name</name>
6
+ <value>local</value>
7
+ </property>
8
+ </configuration>
@@ -0,0 +1,3 @@
1
+ timestamp,host,path,method,referer,code,agent,user,size,d,flag
2
+ 2014-10-02 22:15:39,84.186.29.187,/category/electronics,GET,/category/music,200,Mozilla/5.0,-,136,1.1,true
3
+ 2014-10-02 22:15:01,140.36.216.47,/category/music?from=10,GET,-,200,Mozilla/5.0,-,70,1.2,false
@@ -0,0 +1,4 @@
1
+ timestamp,host,path,method,referer,code,agent,user,size,d,flag
2
+ 2014-10-02 22:15:39,84.186.29.187,/category/electronics,GET,/category/music,200,Mozilla/5.0,-,136,1.1,true
3
+ 2014-10-02 22:15:01,140.36.216.47,/category/music?from=10,GET,-,200,Mozilla/5.0,-,70,1.2,false
4
+
@@ -0,0 +1,4 @@
1
+ embulk-executor-mapreduce/src/test/resources/fixtures/invalid_csv/sample2.csv timestamp,host,path,method,referer,code,agent,user,size,d,flag
2
+ 2014-10-02 22:15:39,84.186.29.187,/category/electronics,GET,/category/music,mapred,Mozilla/5.0,-,136,1.1,true
3
+ 2014-10-02 22:15:01,140.36.216.47,/category/music?from=10,GET,-,200,Mozilla/5.0,-,70,1.2,false
4
+
@@ -0,0 +1,3 @@
1
+ timestamp,host,path,method,referer,code,agent,user,size,d,flag
2
+ 2014-10-02 22:15:39,84.186.29.187,/category/electronics,GET,/category/music,200,Mozilla/5.0,-,136,1.1,true
3
+ 2014-10-02 22:15:01,140.36.216.47,/category/music?from=10,GET,-,200,Mozilla/5.0,-,70,1.2,false
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-executor-mapreduce
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-28 00:00:00.000000000 Z
11
+ date: 2015-11-11 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Executes tasks on Hadoop.
14
14
  email:
@@ -39,6 +39,28 @@ files:
39
39
  - src/main/java/org/embulk/executor/mapreduce/RemoteTaskFailedException.java
40
40
  - src/main/java/org/embulk/executor/mapreduce/SetContextClassLoader.java
41
41
  - src/main/java/org/embulk/executor/mapreduce/TimestampPartitioning.java
42
+ - src/test/java/org/embulk/executor/mapreduce/MapReduceExecutorTestRuntime.java
43
+ - src/test/java/org/embulk/executor/mapreduce/TestAttemptState.java
44
+ - src/test/java/org/embulk/executor/mapreduce/TestEmbulkInputFormat.java
45
+ - src/test/java/org/embulk/executor/mapreduce/TestEmbulkInputSplit.java
46
+ - src/test/java/org/embulk/executor/mapreduce/TestEmbulkRecordReader.java
47
+ - src/test/java/org/embulk/executor/mapreduce/TestMapReduceExecutor.java
48
+ - src/test/java/org/embulk/executor/mapreduce/TestPageBufferWritable.java
49
+ - src/test/java/org/embulk/executor/mapreduce/TestTimestampPartitioning.java
50
+ - src/test/resources/config/core-site.xml
51
+ - src/test/resources/config/embulk_mapred_config.yml
52
+ - src/test/resources/config/embulk_mapred_invalid_config_files_config.yml
53
+ - src/test/resources/config/embulk_mapred_invalid_libjars_config.yml
54
+ - src/test/resources/config/embulk_mapred_invalid_partitioning_config.yml
55
+ - src/test/resources/config/embulk_mapred_invalid_reducers_config.yml
56
+ - src/test/resources/config/embulk_mapred_partitioning_config.yml
57
+ - src/test/resources/config/embulk_mapred_stop_on_invalid_record_config.yml
58
+ - src/test/resources/config/hdfs-site.xml
59
+ - src/test/resources/config/mapred-site.xml
60
+ - src/test/resources/fixtures/csv/sample1.csv
61
+ - src/test/resources/fixtures/csv/sample2.csv
62
+ - src/test/resources/fixtures/invalid_csv/sample1.csv
63
+ - src/test/resources/fixtures/invalid_csv/sample2.csv
42
64
  - classpath/activation-1.1.jar
43
65
  - classpath/apacheds-i18n-2.0.0-M15.jar
44
66
  - classpath/apacheds-kerberos-codec-2.0.0-M15.jar
@@ -62,7 +84,7 @@ files:
62
84
  - classpath/curator-client-2.6.0.jar
63
85
  - classpath/curator-framework-2.6.0.jar
64
86
  - classpath/curator-recipes-2.6.0.jar
65
- - classpath/embulk-executor-mapreduce-0.2.2.jar
87
+ - classpath/embulk-executor-mapreduce-0.2.3.jar
66
88
  - classpath/gson-2.2.4.jar
67
89
  - classpath/hadoop-annotations-2.6.0.jar
68
90
  - classpath/hadoop-auth-2.6.0.jar