embulk-parser-apache-custom-log 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -1
- data/build.gradle +1 -1
- data/circle.yml +3 -0
- data/src/main/java/org/embulk/parser/apache/log/SimpleDateFormatTimestampLogElement.java +42 -0
- data/src/main/java/org/embulk/parser/apache/log/TimestampLogElementFactory.java +6 -3
- data/src/test/java/org/embulk/parser/TestApacheLogParserPlugin.java +36 -1
- data/src/test/java/org/embulk/tester/DummyConfigSource.java +5 -0
- data/src/test/java/org/embulk/tester/EmbulkPluginTester.java +10 -3
- data/src/test/resources/data/access_log_custom_time_format +1 -0
- data/src/test/resources/yml/test_custom_time_format.yml +13 -0
- metadata +7 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0e9175fc76d713c5e0cb3d94e76abfd1d5907381
|
|
4
|
+
data.tar.gz: b0b200ead1ca5153054ac23656bdef3257ef9ba9
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9d31ad44accc1e444a1b27d5d0cb26e62c09d859ae1d25ab0fbcbabf1187810967ee22bc09347f1f0058e5172f4d942d46bf036bf4afdc726e920d30bb26f2a9
|
|
7
|
+
data.tar.gz: a8c7663f5b7084dfcc721b9b09ddb25e7a8f6365fe0db77536182dd9c914fe0028dc2efa27b9c36ed07d76aac6a7ec4430b1526807bfde2fc7c561225e24635f
|
data/README.md
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# Apache **CustomLog** parser plugin for Embulk
|
|
2
2
|
|
|
3
|
+

|
|
4
|
+
|
|
3
5
|
Embulk parser plugin for apache **CustomLog**.
|
|
4
6
|
|
|
5
7
|
Parser configuration based [Apache HTTPD 2.2 CustomLogFormat](http://httpd.apache.org/docs/2.2/en/mod/mod_log_config.html#formats)
|
|
@@ -64,7 +66,7 @@ see: [LogFormats.java](https://github.com/jami-i/embulk-parser-apache-custom-log
|
|
|
64
66
|
| q | String | request-query | |
|
|
65
67
|
| r | String | request-line | |
|
|
66
68
|
| s | Long | response-status | |
|
|
67
|
-
| t | Timestamp | request-time | timestamp format defined in [strptime](http://docs.ruby-lang.org/en/2.0.0/DateTime.html#method-c-_strptime)|
|
|
69
|
+
| t | Timestamp | request-time | timestamp format defined in [strptime](http://docs.ruby-lang.org/en/2.0.0/DateTime.html#method-c-_strptime). <br/>if format is not present, DateTimeFormatter class will be used. |
|
|
68
70
|
| T | Long | request-process-time-s | |
|
|
69
71
|
| u | String | request-user | |
|
|
70
72
|
| U | String | request-path | |
|
data/build.gradle
CHANGED
data/circle.yml
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
package org.embulk.parser.apache.log;
|
|
2
|
+
|
|
3
|
+
import org.embulk.spi.PageBuilder;
|
|
4
|
+
import org.embulk.spi.time.Timestamp;
|
|
5
|
+
import org.embulk.spi.time.TimestampParser;
|
|
6
|
+
import org.joda.time.format.DateTimeFormat;
|
|
7
|
+
import org.joda.time.format.DateTimeFormatter;
|
|
8
|
+
|
|
9
|
+
import java.util.Locale;
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
public class SimpleDateFormatTimestampLogElement extends TimestampLogElement {
|
|
13
|
+
|
|
14
|
+
static final DateTimeFormatter formatter =
|
|
15
|
+
DateTimeFormat
|
|
16
|
+
.forPattern("dd/MMM/yyyy:HH:mm:ss Z")
|
|
17
|
+
.withLocale(Locale.US);
|
|
18
|
+
|
|
19
|
+
public SimpleDateFormatTimestampLogElement(TimestampParser.Task task, String name) {
|
|
20
|
+
super(task, name, "\\[([^\\]]+)\\]", "");
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
@Override
|
|
24
|
+
public Timestamp parse(String s) {
|
|
25
|
+
try{
|
|
26
|
+
long epoch = formatter.parseDateTime(s).getMillis();
|
|
27
|
+
return Timestamp.ofEpochMilli(epoch);
|
|
28
|
+
}catch (Exception e){
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
@Override
|
|
34
|
+
public void setToPageBuilder(PageBuilder pageBuilder, int i, String value) {
|
|
35
|
+
Timestamp parse = parse(value);
|
|
36
|
+
if(parse != null){
|
|
37
|
+
pageBuilder.setTimestamp(i, parse);
|
|
38
|
+
}else{
|
|
39
|
+
pageBuilder.setNull(i);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
package org.embulk.parser.apache.log;
|
|
2
2
|
|
|
3
|
-
import java.util.regex.Pattern;
|
|
4
|
-
import java.util.regex.Matcher;
|
|
5
3
|
|
|
6
4
|
import org.apache.commons.lang3.StringUtils;
|
|
7
5
|
import org.embulk.spi.time.TimestampParser;
|
|
6
|
+
import org.slf4j.Logger;
|
|
7
|
+
import org.slf4j.LoggerFactory;
|
|
8
8
|
|
|
9
9
|
public class TimestampLogElementFactory implements LogElementFactory<TimestampLogElement>, Patterns {
|
|
10
10
|
|
|
11
|
+
private static final Logger logger = LoggerFactory.getLogger(TimestampLogElementFactory.class);
|
|
12
|
+
|
|
11
13
|
private TimestampParser.Task task;
|
|
12
14
|
private String name;
|
|
13
15
|
|
|
@@ -19,7 +21,8 @@ public class TimestampLogElementFactory implements LogElementFactory<TimestampLo
|
|
|
19
21
|
@Override
|
|
20
22
|
public TimestampLogElement create(String parameter) {
|
|
21
23
|
if(StringUtils.isEmpty(parameter)){
|
|
22
|
-
|
|
24
|
+
logger.info("since format parameter is not given, use DateTimeFormatter.");
|
|
25
|
+
return new SimpleDateFormatTimestampLogElement(task, name);
|
|
23
26
|
}else{
|
|
24
27
|
String regex = toTimestampRegex(parameter);
|
|
25
28
|
return new TimestampLogElement(task, name, regex, parameter);
|
|
@@ -9,6 +9,7 @@ import java.io.File;
|
|
|
9
9
|
import java.io.FileReader;
|
|
10
10
|
import java.io.IOException;
|
|
11
11
|
import java.net.URISyntaxException;
|
|
12
|
+
import java.util.Arrays;
|
|
12
13
|
import java.util.function.Consumer;
|
|
13
14
|
|
|
14
15
|
import static org.hamcrest.CoreMatchers.is;
|
|
@@ -16,7 +17,7 @@ import static org.junit.Assert.assertThat;
|
|
|
16
17
|
|
|
17
18
|
public class TestApacheLogParserPlugin {
|
|
18
19
|
|
|
19
|
-
private static EmbulkPluginTester tester = new EmbulkPluginTester(ParserPlugin.class, "apache-log",
|
|
20
|
+
private static EmbulkPluginTester tester = new EmbulkPluginTester(ParserPlugin.class, "apache-log", ApacheCustomLogParserPlugin.class);
|
|
20
21
|
|
|
21
22
|
@Test
|
|
22
23
|
public void test_common() throws Exception {
|
|
@@ -52,6 +53,40 @@ public class TestApacheLogParserPlugin {
|
|
|
52
53
|
|
|
53
54
|
}
|
|
54
55
|
|
|
56
|
+
@Test
|
|
57
|
+
public void test_custom_time_format() throws Exception {
|
|
58
|
+
tester.run("/yml/test_custom_time_format.yml");
|
|
59
|
+
|
|
60
|
+
assertResult(
|
|
61
|
+
"/temp/result_custom_time_format.000.00.tsv",
|
|
62
|
+
cols -> {
|
|
63
|
+
String[] expected = new String[]{
|
|
64
|
+
"remote-host",
|
|
65
|
+
"remote-log-name",
|
|
66
|
+
"request-user",
|
|
67
|
+
"request-time",
|
|
68
|
+
"request-line",
|
|
69
|
+
"response-status",
|
|
70
|
+
"response-bytes"
|
|
71
|
+
};
|
|
72
|
+
assertThat(cols, is(expected));
|
|
73
|
+
},
|
|
74
|
+
cols -> {
|
|
75
|
+
String[] expected = new String[]{
|
|
76
|
+
"127.0.0.1",
|
|
77
|
+
"",
|
|
78
|
+
"frank",
|
|
79
|
+
"2015-11-20 13:55:36.000000 +0000",
|
|
80
|
+
"GET /apache_pb.gif HTTP/1.0",
|
|
81
|
+
"200",
|
|
82
|
+
"2326"
|
|
83
|
+
};
|
|
84
|
+
assertThat(cols, is(expected));
|
|
85
|
+
}
|
|
86
|
+
);
|
|
87
|
+
|
|
88
|
+
}
|
|
89
|
+
|
|
55
90
|
@Test
|
|
56
91
|
public void test_combined() throws Exception {
|
|
57
92
|
tester.run("/yml/test_combined.yml");
|
|
@@ -8,7 +8,8 @@ import java.io.FileWriter;
|
|
|
8
8
|
import java.util.regex.Matcher;
|
|
9
9
|
import java.util.regex.Pattern;
|
|
10
10
|
|
|
11
|
-
import org.embulk.
|
|
11
|
+
import org.embulk.EmbulkEmbed;
|
|
12
|
+
import org.embulk.config.ConfigLoader;
|
|
12
13
|
|
|
13
14
|
public class EmbulkPluginTester {
|
|
14
15
|
|
|
@@ -19,8 +20,14 @@ public class EmbulkPluginTester {
|
|
|
19
20
|
|
|
20
21
|
public void run(String ymlPath) throws Exception
|
|
21
22
|
{
|
|
22
|
-
|
|
23
|
-
|
|
23
|
+
EmbulkEmbed.Bootstrap bootstrap = new EmbulkEmbed.Bootstrap();
|
|
24
|
+
|
|
25
|
+
EmbulkEmbed embulk = bootstrap.initialize();
|
|
26
|
+
|
|
27
|
+
ConfigLoader configLoader = new ConfigLoader(embulk.getModelManager());
|
|
28
|
+
embulk.run(configLoader.fromYamlFile(new File(convert(ymlPath))));
|
|
29
|
+
|
|
30
|
+
|
|
24
31
|
}
|
|
25
32
|
|
|
26
33
|
private String convert(String yml) throws Exception
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
127.0.0.1 - frank [2015.11.20 13:55:36] "GET /apache_pb.gif HTTP/1.0" 200 2326
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
in:
|
|
2
|
+
type: file
|
|
3
|
+
path_prefix: 'data/access_log_custom_time_format'
|
|
4
|
+
parser:
|
|
5
|
+
type: apache-log
|
|
6
|
+
format: '%h %l %u %{[%Y.%m.%d %T]}t \"%r\" %>s %b'
|
|
7
|
+
out:
|
|
8
|
+
type: file
|
|
9
|
+
path_prefix: '/temp/result_custom_time_format.'
|
|
10
|
+
file_ext: tsv
|
|
11
|
+
formatter:
|
|
12
|
+
type: csv
|
|
13
|
+
delimiter: "\t"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: embulk-parser-apache-custom-log
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Hiroyuki Sato
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2015-
|
|
12
|
+
date: 2015-11-20 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -52,6 +52,7 @@ files:
|
|
|
52
52
|
- LICENSE.txt
|
|
53
53
|
- README.md
|
|
54
54
|
- build.gradle
|
|
55
|
+
- circle.yml
|
|
55
56
|
- gradle/wrapper/gradle-wrapper.jar
|
|
56
57
|
- gradle/wrapper/gradle-wrapper.properties
|
|
57
58
|
- gradlew
|
|
@@ -66,6 +67,7 @@ files:
|
|
|
66
67
|
- src/main/java/org/embulk/parser/apache/log/LongLogElementFactory.java
|
|
67
68
|
- src/main/java/org/embulk/parser/apache/log/Patterns.java
|
|
68
69
|
- src/main/java/org/embulk/parser/apache/log/Replacement.java
|
|
70
|
+
- src/main/java/org/embulk/parser/apache/log/SimpleDateFormatTimestampLogElement.java
|
|
69
71
|
- src/main/java/org/embulk/parser/apache/log/StringLogElement.java
|
|
70
72
|
- src/main/java/org/embulk/parser/apache/log/StringLogElementFactory.java
|
|
71
73
|
- src/main/java/org/embulk/parser/apache/log/TimestampLogElement.java
|
|
@@ -82,12 +84,14 @@ files:
|
|
|
82
84
|
- src/test/resources/data/access_log_2_combined
|
|
83
85
|
- src/test/resources/data/access_log_combined
|
|
84
86
|
- src/test/resources/data/access_log_common
|
|
87
|
+
- src/test/resources/data/access_log_custom_time_format
|
|
85
88
|
- src/test/resources/resource.txt
|
|
86
89
|
- src/test/resources/temp/dummy
|
|
87
90
|
- src/test/resources/yml/test_combined.yml
|
|
88
91
|
- src/test/resources/yml/test_combined2.yml
|
|
89
92
|
- src/test/resources/yml/test_common.yml
|
|
90
|
-
-
|
|
93
|
+
- src/test/resources/yml/test_custom_time_format.yml
|
|
94
|
+
- classpath/embulk-parser-apache-custom-log-0.4.0.jar
|
|
91
95
|
homepage: https://github.com/jami-i/embulk-parser-apache-custom-log
|
|
92
96
|
licenses:
|
|
93
97
|
- MIT
|