embulk-executor-mapreduce 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/classpath/{embulk-executor-mapreduce-0.2.2.jar → embulk-executor-mapreduce-0.2.3.jar} +0 -0
- data/src/main/java/org/embulk/executor/mapreduce/MapReduceExecutor.java +48 -24
- data/src/main/java/org/embulk/executor/mapreduce/TimestampPartitioning.java +11 -6
- data/src/test/java/org/embulk/executor/mapreduce/MapReduceExecutorTestRuntime.java +130 -0
- data/src/test/java/org/embulk/executor/mapreduce/TestAttemptState.java +58 -0
- data/src/test/java/org/embulk/executor/mapreduce/TestEmbulkInputFormat.java +54 -0
- data/src/test/java/org/embulk/executor/mapreduce/TestEmbulkInputSplit.java +46 -0
- data/src/test/java/org/embulk/executor/mapreduce/TestEmbulkRecordReader.java +25 -0
- data/src/test/java/org/embulk/executor/mapreduce/TestMapReduceExecutor.java +251 -0
- data/src/test/java/org/embulk/executor/mapreduce/TestPageBufferWritable.java +84 -0
- data/src/test/java/org/embulk/executor/mapreduce/TestTimestampPartitioning.java +222 -0
- data/src/test/resources/config/core-site.xml +8 -0
- data/src/test/resources/config/embulk_mapred_config.yml +38 -0
- data/src/test/resources/config/embulk_mapred_invalid_config_files_config.yml +38 -0
- data/src/test/resources/config/embulk_mapred_invalid_libjars_config.yml +40 -0
- data/src/test/resources/config/embulk_mapred_invalid_partitioning_config.yml +40 -0
- data/src/test/resources/config/embulk_mapred_invalid_reducers_config.yml +44 -0
- data/src/test/resources/config/embulk_mapred_partitioning_config.yml +43 -0
- data/src/test/resources/config/embulk_mapred_stop_on_invalid_record_config.yml +39 -0
- data/src/test/resources/config/hdfs-site.xml +18 -0
- data/src/test/resources/config/mapred-site.xml +8 -0
- data/src/test/resources/fixtures/csv/sample1.csv +3 -0
- data/src/test/resources/fixtures/csv/sample2.csv +4 -0
- data/src/test/resources/fixtures/invalid_csv/sample1.csv +4 -0
- data/src/test/resources/fixtures/invalid_csv/sample2.csv +3 -0
- metadata +25 -3
@@ -0,0 +1,38 @@
|
|
1
|
+
exec:
|
2
|
+
type: mapreduce
|
3
|
+
config_files:
|
4
|
+
- src/test/resources/config/core-site.xml
|
5
|
+
- src/test/resources/config/hdfs-site.xml
|
6
|
+
- src/test/resources/config/mapred-site.xml
|
7
|
+
config:
|
8
|
+
k1: v1
|
9
|
+
k2: v2
|
10
|
+
state_path: 'file:///tmp/embulk/'
|
11
|
+
job_name: embulk_mapred_0001
|
12
|
+
exclude_jars:
|
13
|
+
- '*log4j-over-slf4j*'
|
14
|
+
in:
|
15
|
+
type: file
|
16
|
+
path_prefix: src/test/resources/fixtures/csv/sample
|
17
|
+
parser:
|
18
|
+
charset: UTF-8
|
19
|
+
newline: CRLF
|
20
|
+
type: csv
|
21
|
+
delimiter: ','
|
22
|
+
quote: ''
|
23
|
+
escape: ''
|
24
|
+
skip_header_lines: 1
|
25
|
+
columns:
|
26
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S"}
|
27
|
+
- {name: host, type: string}
|
28
|
+
- {name: path, type: string}
|
29
|
+
- {name: method, type: string}
|
30
|
+
- {name: referer, type: string}
|
31
|
+
- {name: code, type: long}
|
32
|
+
- {name: agent, type: string}
|
33
|
+
- {name: user, type: string}
|
34
|
+
- {name: size, type: long}
|
35
|
+
- {name: d, type: double}
|
36
|
+
- {name: flag, type: boolean}
|
37
|
+
out:
|
38
|
+
type: stdout
|
@@ -0,0 +1,38 @@
|
|
1
|
+
exec:
|
2
|
+
type: mapreduce
|
3
|
+
config_files:
|
4
|
+
- src/test/resources/config/invalid-core-site.xml
|
5
|
+
- src/test/resources/config/invalid-hdfs-site.xml
|
6
|
+
- src/test/resources/config/invalid-mapred-site.xml
|
7
|
+
config:
|
8
|
+
k1: v1
|
9
|
+
k2: v2
|
10
|
+
state_path: 'file:///tmp/embulk/'
|
11
|
+
job_name: embulk_mapred_0001
|
12
|
+
exclude_jars:
|
13
|
+
- '*log4j-over-slf4j*'
|
14
|
+
in:
|
15
|
+
type: file
|
16
|
+
path_prefix: src/test/resources/fixtures/csv/sample
|
17
|
+
parser:
|
18
|
+
charset: UTF-8
|
19
|
+
newline: CRLF
|
20
|
+
type: csv
|
21
|
+
delimiter: ','
|
22
|
+
quote: ''
|
23
|
+
escape: ''
|
24
|
+
skip_header_lines: 1
|
25
|
+
columns:
|
26
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S"}
|
27
|
+
- {name: host, type: string}
|
28
|
+
- {name: path, type: string}
|
29
|
+
- {name: method, type: string}
|
30
|
+
- {name: referer, type: string}
|
31
|
+
- {name: code, type: long}
|
32
|
+
- {name: agent, type: string}
|
33
|
+
- {name: user, type: string}
|
34
|
+
- {name: size, type: long}
|
35
|
+
- {name: d, type: double}
|
36
|
+
- {name: flag, type: boolean}
|
37
|
+
out:
|
38
|
+
type: stdout
|
@@ -0,0 +1,40 @@
|
|
1
|
+
exec:
|
2
|
+
type: mapreduce
|
3
|
+
config_files:
|
4
|
+
- src/test/resources/config/core-site.xml
|
5
|
+
- src/test/resources/config/hdfs-site.xml
|
6
|
+
- src/test/resources/config/mapred-site.xml
|
7
|
+
config:
|
8
|
+
k1: v1
|
9
|
+
k2: v2
|
10
|
+
state_path: 'file:///tmp/embulk/'
|
11
|
+
job_name: embulk_mapred_0001
|
12
|
+
libjars:
|
13
|
+
- invalid_jar
|
14
|
+
exclude_jars:
|
15
|
+
- '*log4j-over-slf4j*'
|
16
|
+
in:
|
17
|
+
type: file
|
18
|
+
path_prefix: src/test/resources/fixtures/csv/sample
|
19
|
+
parser:
|
20
|
+
charset: UTF-8
|
21
|
+
newline: CRLF
|
22
|
+
type: csv
|
23
|
+
delimiter: ','
|
24
|
+
quote: ''
|
25
|
+
escape: ''
|
26
|
+
skip_header_lines: 1
|
27
|
+
columns:
|
28
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S"}
|
29
|
+
- {name: host, type: string}
|
30
|
+
- {name: path, type: string}
|
31
|
+
- {name: method, type: string}
|
32
|
+
- {name: referer, type: string}
|
33
|
+
- {name: code, type: long}
|
34
|
+
- {name: agent, type: string}
|
35
|
+
- {name: user, type: string}
|
36
|
+
- {name: size, type: long}
|
37
|
+
- {name: d, type: double}
|
38
|
+
- {name: flag, type: boolean}
|
39
|
+
out:
|
40
|
+
type: stdout
|
@@ -0,0 +1,40 @@
|
|
1
|
+
exec:
|
2
|
+
type: mapreduce
|
3
|
+
config_files:
|
4
|
+
- src/test/resources/config/core-site.xml
|
5
|
+
- src/test/resources/config/hdfs-site.xml
|
6
|
+
- src/test/resources/config/mapred-site.xml
|
7
|
+
config:
|
8
|
+
k1: v1
|
9
|
+
k2: v2
|
10
|
+
partitioning:
|
11
|
+
type: long
|
12
|
+
state_path: 'file:///tmp/embulk/'
|
13
|
+
job_name: embulk_mapred_partitioning_0001
|
14
|
+
exclude_jars:
|
15
|
+
- '*log4j-over-slf4j*'
|
16
|
+
in:
|
17
|
+
type: file
|
18
|
+
path_prefix: src/test/resources/fixtures/csv/sample
|
19
|
+
parser:
|
20
|
+
charset: UTF-8
|
21
|
+
newline: CRLF
|
22
|
+
type: csv
|
23
|
+
delimiter: ','
|
24
|
+
quote: ''
|
25
|
+
escape: ''
|
26
|
+
skip_header_lines: 1
|
27
|
+
columns:
|
28
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S"}
|
29
|
+
- {name: host, type: string}
|
30
|
+
- {name: path, type: string}
|
31
|
+
- {name: method, type: string}
|
32
|
+
- {name: referer, type: string}
|
33
|
+
- {name: code, type: long}
|
34
|
+
- {name: agent, type: string}
|
35
|
+
- {name: user, type: string}
|
36
|
+
- {name: size, type: long}
|
37
|
+
- {name: d, type: double}
|
38
|
+
- {name: flag, type: boolean}
|
39
|
+
out:
|
40
|
+
type: stdout
|
@@ -0,0 +1,44 @@
|
|
1
|
+
exec:
|
2
|
+
type: mapreduce
|
3
|
+
config_files:
|
4
|
+
- src/test/resources/config/core-site.xml
|
5
|
+
- src/test/resources/config/hdfs-site.xml
|
6
|
+
- src/test/resources/config/mapred-site.xml
|
7
|
+
config:
|
8
|
+
k1: v1
|
9
|
+
k2: v2
|
10
|
+
partitioning:
|
11
|
+
type: timestamp
|
12
|
+
unit: hour
|
13
|
+
column: timestamp
|
14
|
+
unix_timestamp_unit: sec
|
15
|
+
reducers: -1
|
16
|
+
state_path: 'file:///tmp/embulk/'
|
17
|
+
job_name: embulk_mapred_0001
|
18
|
+
exclude_jars:
|
19
|
+
- '*log4j-over-slf4j*'
|
20
|
+
in:
|
21
|
+
type: file
|
22
|
+
path_prefix: src/test/resources/fixtures/csv/sample
|
23
|
+
parser:
|
24
|
+
charset: UTF-8
|
25
|
+
newline: CRLF
|
26
|
+
type: csv
|
27
|
+
delimiter: ','
|
28
|
+
quote: ''
|
29
|
+
escape: ''
|
30
|
+
skip_header_lines: 1
|
31
|
+
columns:
|
32
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S"}
|
33
|
+
- {name: host, type: string}
|
34
|
+
- {name: path, type: string}
|
35
|
+
- {name: method, type: string}
|
36
|
+
- {name: referer, type: string}
|
37
|
+
- {name: code, type: long}
|
38
|
+
- {name: agent, type: string}
|
39
|
+
- {name: user, type: string}
|
40
|
+
- {name: size, type: long}
|
41
|
+
- {name: d, type: double}
|
42
|
+
- {name: flag, type: boolean}
|
43
|
+
out:
|
44
|
+
type: stdout
|
@@ -0,0 +1,43 @@
|
|
1
|
+
exec:
|
2
|
+
type: mapreduce
|
3
|
+
config_files:
|
4
|
+
- src/test/resources/config/core-site.xml
|
5
|
+
- src/test/resources/config/hdfs-site.xml
|
6
|
+
- src/test/resources/config/mapred-site.xml
|
7
|
+
config:
|
8
|
+
k1: v1
|
9
|
+
k2: v2
|
10
|
+
partitioning:
|
11
|
+
type: timestamp
|
12
|
+
unit: hour
|
13
|
+
column: timestamp
|
14
|
+
unix_timestamp_unit: sec
|
15
|
+
state_path: 'file:///tmp/embulk/'
|
16
|
+
job_name: embulk_mapred_partitioning_0001
|
17
|
+
exclude_jars:
|
18
|
+
- '*log4j-over-slf4j*'
|
19
|
+
in:
|
20
|
+
type: file
|
21
|
+
path_prefix: src/test/resources/fixtures/csv/sample
|
22
|
+
parser:
|
23
|
+
charset: UTF-8
|
24
|
+
newline: CRLF
|
25
|
+
type: csv
|
26
|
+
delimiter: ','
|
27
|
+
quote: ''
|
28
|
+
escape: ''
|
29
|
+
skip_header_lines: 1
|
30
|
+
columns:
|
31
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S"}
|
32
|
+
- {name: host, type: string}
|
33
|
+
- {name: path, type: string}
|
34
|
+
- {name: method, type: string}
|
35
|
+
- {name: referer, type: string}
|
36
|
+
- {name: code, type: long}
|
37
|
+
- {name: agent, type: string}
|
38
|
+
- {name: user, type: string}
|
39
|
+
- {name: size, type: long}
|
40
|
+
- {name: d, type: double}
|
41
|
+
- {name: flag, type: boolean}
|
42
|
+
out:
|
43
|
+
type: stdout
|
@@ -0,0 +1,39 @@
|
|
1
|
+
exec:
|
2
|
+
type: mapreduce
|
3
|
+
config_files:
|
4
|
+
- src/test/resources/config/core-site.xml
|
5
|
+
- src/test/resources/config/hdfs-site.xml
|
6
|
+
- src/test/resources/config/mapred-site.xml
|
7
|
+
config:
|
8
|
+
k1: v1
|
9
|
+
k2: v2
|
10
|
+
state_path: 'file:///tmp/embulk/'
|
11
|
+
job_name: embulk_mapred_0001
|
12
|
+
exclude_jars:
|
13
|
+
- '*log4j-over-slf4j*'
|
14
|
+
in:
|
15
|
+
type: file
|
16
|
+
path_prefix: src/test/resources/fixtures/invalid_csv/sample
|
17
|
+
parser:
|
18
|
+
charset: UTF-8
|
19
|
+
newline: CRLF
|
20
|
+
type: csv
|
21
|
+
delimiter: ','
|
22
|
+
quote: ''
|
23
|
+
escape: ''
|
24
|
+
skip_header_lines: 1
|
25
|
+
stop_on_invalid_record: true
|
26
|
+
columns:
|
27
|
+
- {name: timestamp, type: timestamp, format: "%Y-%m-%d %H:%M:%S"}
|
28
|
+
- {name: host, type: string}
|
29
|
+
- {name: path, type: string}
|
30
|
+
- {name: method, type: string}
|
31
|
+
- {name: referer, type: string}
|
32
|
+
- {name: code, type: long}
|
33
|
+
- {name: agent, type: string}
|
34
|
+
- {name: user, type: string}
|
35
|
+
- {name: size, type: long}
|
36
|
+
- {name: d, type: double}
|
37
|
+
- {name: flag, type: boolean}
|
38
|
+
out:
|
39
|
+
type: stdout
|
@@ -0,0 +1,18 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
3
|
+
<configuration>
|
4
|
+
<property>
|
5
|
+
<name>dfs.replication</name>
|
6
|
+
<value>1</value>
|
7
|
+
</property>
|
8
|
+
|
9
|
+
<property>
|
10
|
+
<name>dfs.name.dir</name>
|
11
|
+
<value>file:///tmp/hdfs/namenode</value>
|
12
|
+
</property>
|
13
|
+
|
14
|
+
<property>
|
15
|
+
<name>dfs.data.dir</name>
|
16
|
+
<value>file:///tmp/hdfs/datanode</value>
|
17
|
+
</property>
|
18
|
+
</configuration>
|
@@ -0,0 +1,4 @@
|
|
1
|
+
embulk-executor-mapreduce/src/test/resources/fixtures/invalid_csv/sample2.csv timestamp,host,path,method,referer,code,agent,user,size,d,flag
|
2
|
+
2014-10-02 22:15:39,84.186.29.187,/category/electronics,GET,/category/music,mapred,Mozilla/5.0,-,136,1.1,true
|
3
|
+
2014-10-02 22:15:01,140.36.216.47,/category/music?from=10,GET,-,200,Mozilla/5.0,-,70,1.2,false
|
4
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-executor-mapreduce
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sadayuki Furuhashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-11-11 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Executes tasks on Hadoop.
|
14
14
|
email:
|
@@ -39,6 +39,28 @@ files:
|
|
39
39
|
- src/main/java/org/embulk/executor/mapreduce/RemoteTaskFailedException.java
|
40
40
|
- src/main/java/org/embulk/executor/mapreduce/SetContextClassLoader.java
|
41
41
|
- src/main/java/org/embulk/executor/mapreduce/TimestampPartitioning.java
|
42
|
+
- src/test/java/org/embulk/executor/mapreduce/MapReduceExecutorTestRuntime.java
|
43
|
+
- src/test/java/org/embulk/executor/mapreduce/TestAttemptState.java
|
44
|
+
- src/test/java/org/embulk/executor/mapreduce/TestEmbulkInputFormat.java
|
45
|
+
- src/test/java/org/embulk/executor/mapreduce/TestEmbulkInputSplit.java
|
46
|
+
- src/test/java/org/embulk/executor/mapreduce/TestEmbulkRecordReader.java
|
47
|
+
- src/test/java/org/embulk/executor/mapreduce/TestMapReduceExecutor.java
|
48
|
+
- src/test/java/org/embulk/executor/mapreduce/TestPageBufferWritable.java
|
49
|
+
- src/test/java/org/embulk/executor/mapreduce/TestTimestampPartitioning.java
|
50
|
+
- src/test/resources/config/core-site.xml
|
51
|
+
- src/test/resources/config/embulk_mapred_config.yml
|
52
|
+
- src/test/resources/config/embulk_mapred_invalid_config_files_config.yml
|
53
|
+
- src/test/resources/config/embulk_mapred_invalid_libjars_config.yml
|
54
|
+
- src/test/resources/config/embulk_mapred_invalid_partitioning_config.yml
|
55
|
+
- src/test/resources/config/embulk_mapred_invalid_reducers_config.yml
|
56
|
+
- src/test/resources/config/embulk_mapred_partitioning_config.yml
|
57
|
+
- src/test/resources/config/embulk_mapred_stop_on_invalid_record_config.yml
|
58
|
+
- src/test/resources/config/hdfs-site.xml
|
59
|
+
- src/test/resources/config/mapred-site.xml
|
60
|
+
- src/test/resources/fixtures/csv/sample1.csv
|
61
|
+
- src/test/resources/fixtures/csv/sample2.csv
|
62
|
+
- src/test/resources/fixtures/invalid_csv/sample1.csv
|
63
|
+
- src/test/resources/fixtures/invalid_csv/sample2.csv
|
42
64
|
- classpath/activation-1.1.jar
|
43
65
|
- classpath/apacheds-i18n-2.0.0-M15.jar
|
44
66
|
- classpath/apacheds-kerberos-codec-2.0.0-M15.jar
|
@@ -62,7 +84,7 @@ files:
|
|
62
84
|
- classpath/curator-client-2.6.0.jar
|
63
85
|
- classpath/curator-framework-2.6.0.jar
|
64
86
|
- classpath/curator-recipes-2.6.0.jar
|
65
|
-
- classpath/embulk-executor-mapreduce-0.2.
|
87
|
+
- classpath/embulk-executor-mapreduce-0.2.3.jar
|
66
88
|
- classpath/gson-2.2.4.jar
|
67
89
|
- classpath/hadoop-annotations-2.6.0.jar
|
68
90
|
- classpath/hadoop-auth-2.6.0.jar
|