embulk-parser-apache-custom-log 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -1
- data/build.gradle +1 -1
- data/circle.yml +3 -0
- data/src/main/java/org/embulk/parser/apache/log/SimpleDateFormatTimestampLogElement.java +42 -0
- data/src/main/java/org/embulk/parser/apache/log/TimestampLogElementFactory.java +6 -3
- data/src/test/java/org/embulk/parser/TestApacheLogParserPlugin.java +36 -1
- data/src/test/java/org/embulk/tester/DummyConfigSource.java +5 -0
- data/src/test/java/org/embulk/tester/EmbulkPluginTester.java +10 -3
- data/src/test/resources/data/access_log_custom_time_format +1 -0
- data/src/test/resources/yml/test_custom_time_format.yml +13 -0
- metadata +7 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0e9175fc76d713c5e0cb3d94e76abfd1d5907381
|
4
|
+
data.tar.gz: b0b200ead1ca5153054ac23656bdef3257ef9ba9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9d31ad44accc1e444a1b27d5d0cb26e62c09d859ae1d25ab0fbcbabf1187810967ee22bc09347f1f0058e5172f4d942d46bf036bf4afdc726e920d30bb26f2a9
|
7
|
+
data.tar.gz: a8c7663f5b7084dfcc721b9b09ddb25e7a8f6365fe0db77536182dd9c914fe0028dc2efa27b9c36ed07d76aac6a7ec4430b1526807bfde2fc7c561225e24635f
|
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# Apache **CustomLog** parser plugin for Embulk
|
2
2
|
|
3
|
+
![circlci-badge](https://circleci.com/gh/jami-i/embulk-parser-apache-custom-log.svg?&style=shield&circle-token=7092d38cbe35149872178bbce455dfc9d5bc951b)
|
4
|
+
|
3
5
|
Embulk parser plugin for apache **CustomLog**.
|
4
6
|
|
5
7
|
Parser configuration based [Apache HTTPD 2.2 CustomLogFormat](http://httpd.apache.org/docs/2.2/en/mod/mod_log_config.html#formats)
|
@@ -64,7 +66,7 @@ see: [LogFormats.java](https://github.com/jami-i/embulk-parser-apache-custom-log
|
|
64
66
|
| q | String | request-query | |
|
65
67
|
| r | String | request-line | |
|
66
68
|
| s | Long | response-status | |
|
67
|
-
| t | Timestamp | request-time | timestamp format defined in [strptime](http://docs.ruby-lang.org/en/2.0.0/DateTime.html#method-c-_strptime)|
|
69
|
+
| t | Timestamp | request-time | timestamp format defined in [strptime](http://docs.ruby-lang.org/en/2.0.0/DateTime.html#method-c-_strptime). <br/>if format is not present, DateTimeFormatter class will be used. |
|
68
70
|
| T | Long | request-process-time-s | |
|
69
71
|
| u | String | request-user | |
|
70
72
|
| U | String | request-path | |
|
data/build.gradle
CHANGED
data/circle.yml
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
package org.embulk.parser.apache.log;
|
2
|
+
|
3
|
+
import org.embulk.spi.PageBuilder;
|
4
|
+
import org.embulk.spi.time.Timestamp;
|
5
|
+
import org.embulk.spi.time.TimestampParser;
|
6
|
+
import org.joda.time.format.DateTimeFormat;
|
7
|
+
import org.joda.time.format.DateTimeFormatter;
|
8
|
+
|
9
|
+
import java.util.Locale;
|
10
|
+
|
11
|
+
|
12
|
+
public class SimpleDateFormatTimestampLogElement extends TimestampLogElement {
|
13
|
+
|
14
|
+
static final DateTimeFormatter formatter =
|
15
|
+
DateTimeFormat
|
16
|
+
.forPattern("dd/MMM/yyyy:HH:mm:ss Z")
|
17
|
+
.withLocale(Locale.US);
|
18
|
+
|
19
|
+
public SimpleDateFormatTimestampLogElement(TimestampParser.Task task, String name) {
|
20
|
+
super(task, name, "\\[([^\\]]+)\\]", "");
|
21
|
+
}
|
22
|
+
|
23
|
+
@Override
|
24
|
+
public Timestamp parse(String s) {
|
25
|
+
try{
|
26
|
+
long epoch = formatter.parseDateTime(s).getMillis();
|
27
|
+
return Timestamp.ofEpochMilli(epoch);
|
28
|
+
}catch (Exception e){
|
29
|
+
return null;
|
30
|
+
}
|
31
|
+
}
|
32
|
+
|
33
|
+
@Override
|
34
|
+
public void setToPageBuilder(PageBuilder pageBuilder, int i, String value) {
|
35
|
+
Timestamp parse = parse(value);
|
36
|
+
if(parse != null){
|
37
|
+
pageBuilder.setTimestamp(i, parse);
|
38
|
+
}else{
|
39
|
+
pageBuilder.setNull(i);
|
40
|
+
}
|
41
|
+
}
|
42
|
+
}
|
@@ -1,13 +1,15 @@
|
|
1
1
|
package org.embulk.parser.apache.log;
|
2
2
|
|
3
|
-
import java.util.regex.Pattern;
|
4
|
-
import java.util.regex.Matcher;
|
5
3
|
|
6
4
|
import org.apache.commons.lang3.StringUtils;
|
7
5
|
import org.embulk.spi.time.TimestampParser;
|
6
|
+
import org.slf4j.Logger;
|
7
|
+
import org.slf4j.LoggerFactory;
|
8
8
|
|
9
9
|
public class TimestampLogElementFactory implements LogElementFactory<TimestampLogElement>, Patterns {
|
10
10
|
|
11
|
+
private static final Logger logger = LoggerFactory.getLogger(TimestampLogElementFactory.class);
|
12
|
+
|
11
13
|
private TimestampParser.Task task;
|
12
14
|
private String name;
|
13
15
|
|
@@ -19,7 +21,8 @@ public class TimestampLogElementFactory implements LogElementFactory<TimestampLo
|
|
19
21
|
@Override
|
20
22
|
public TimestampLogElement create(String parameter) {
|
21
23
|
if(StringUtils.isEmpty(parameter)){
|
22
|
-
|
24
|
+
logger.info("since format parameter is not given, use DateTimeFormatter.");
|
25
|
+
return new SimpleDateFormatTimestampLogElement(task, name);
|
23
26
|
}else{
|
24
27
|
String regex = toTimestampRegex(parameter);
|
25
28
|
return new TimestampLogElement(task, name, regex, parameter);
|
@@ -9,6 +9,7 @@ import java.io.File;
|
|
9
9
|
import java.io.FileReader;
|
10
10
|
import java.io.IOException;
|
11
11
|
import java.net.URISyntaxException;
|
12
|
+
import java.util.Arrays;
|
12
13
|
import java.util.function.Consumer;
|
13
14
|
|
14
15
|
import static org.hamcrest.CoreMatchers.is;
|
@@ -16,7 +17,7 @@ import static org.junit.Assert.assertThat;
|
|
16
17
|
|
17
18
|
public class TestApacheLogParserPlugin {
|
18
19
|
|
19
|
-
private static EmbulkPluginTester tester = new EmbulkPluginTester(ParserPlugin.class, "apache-log",
|
20
|
+
private static EmbulkPluginTester tester = new EmbulkPluginTester(ParserPlugin.class, "apache-log", ApacheCustomLogParserPlugin.class);
|
20
21
|
|
21
22
|
@Test
|
22
23
|
public void test_common() throws Exception {
|
@@ -52,6 +53,40 @@ public class TestApacheLogParserPlugin {
|
|
52
53
|
|
53
54
|
}
|
54
55
|
|
56
|
+
@Test
|
57
|
+
public void test_custom_time_format() throws Exception {
|
58
|
+
tester.run("/yml/test_custom_time_format.yml");
|
59
|
+
|
60
|
+
assertResult(
|
61
|
+
"/temp/result_custom_time_format.000.00.tsv",
|
62
|
+
cols -> {
|
63
|
+
String[] expected = new String[]{
|
64
|
+
"remote-host",
|
65
|
+
"remote-log-name",
|
66
|
+
"request-user",
|
67
|
+
"request-time",
|
68
|
+
"request-line",
|
69
|
+
"response-status",
|
70
|
+
"response-bytes"
|
71
|
+
};
|
72
|
+
assertThat(cols, is(expected));
|
73
|
+
},
|
74
|
+
cols -> {
|
75
|
+
String[] expected = new String[]{
|
76
|
+
"127.0.0.1",
|
77
|
+
"",
|
78
|
+
"frank",
|
79
|
+
"2015-11-20 13:55:36.000000 +0000",
|
80
|
+
"GET /apache_pb.gif HTTP/1.0",
|
81
|
+
"200",
|
82
|
+
"2326"
|
83
|
+
};
|
84
|
+
assertThat(cols, is(expected));
|
85
|
+
}
|
86
|
+
);
|
87
|
+
|
88
|
+
}
|
89
|
+
|
55
90
|
@Test
|
56
91
|
public void test_combined() throws Exception {
|
57
92
|
tester.run("/yml/test_combined.yml");
|
@@ -8,7 +8,8 @@ import java.io.FileWriter;
|
|
8
8
|
import java.util.regex.Matcher;
|
9
9
|
import java.util.regex.Pattern;
|
10
10
|
|
11
|
-
import org.embulk.
|
11
|
+
import org.embulk.EmbulkEmbed;
|
12
|
+
import org.embulk.config.ConfigLoader;
|
12
13
|
|
13
14
|
public class EmbulkPluginTester {
|
14
15
|
|
@@ -19,8 +20,14 @@ public class EmbulkPluginTester {
|
|
19
20
|
|
20
21
|
public void run(String ymlPath) throws Exception
|
21
22
|
{
|
22
|
-
|
23
|
-
|
23
|
+
EmbulkEmbed.Bootstrap bootstrap = new EmbulkEmbed.Bootstrap();
|
24
|
+
|
25
|
+
EmbulkEmbed embulk = bootstrap.initialize();
|
26
|
+
|
27
|
+
ConfigLoader configLoader = new ConfigLoader(embulk.getModelManager());
|
28
|
+
embulk.run(configLoader.fromYamlFile(new File(convert(ymlPath))));
|
29
|
+
|
30
|
+
|
24
31
|
}
|
25
32
|
|
26
33
|
private String convert(String yml) throws Exception
|
@@ -0,0 +1 @@
|
|
1
|
+
127.0.0.1 - frank [2015.11.20 13:55:36] "GET /apache_pb.gif HTTP/1.0" 200 2326
|
@@ -0,0 +1,13 @@
|
|
1
|
+
in:
|
2
|
+
type: file
|
3
|
+
path_prefix: 'data/access_log_custom_time_format'
|
4
|
+
parser:
|
5
|
+
type: apache-log
|
6
|
+
format: '%h %l %u %{[%Y.%m.%d %T]}t \"%r\" %>s %b'
|
7
|
+
out:
|
8
|
+
type: file
|
9
|
+
path_prefix: '/temp/result_custom_time_format.'
|
10
|
+
file_ext: tsv
|
11
|
+
formatter:
|
12
|
+
type: csv
|
13
|
+
delimiter: "\t"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-apache-custom-log
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hiroyuki Sato
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
12
|
+
date: 2015-11-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,6 +52,7 @@ files:
|
|
52
52
|
- LICENSE.txt
|
53
53
|
- README.md
|
54
54
|
- build.gradle
|
55
|
+
- circle.yml
|
55
56
|
- gradle/wrapper/gradle-wrapper.jar
|
56
57
|
- gradle/wrapper/gradle-wrapper.properties
|
57
58
|
- gradlew
|
@@ -66,6 +67,7 @@ files:
|
|
66
67
|
- src/main/java/org/embulk/parser/apache/log/LongLogElementFactory.java
|
67
68
|
- src/main/java/org/embulk/parser/apache/log/Patterns.java
|
68
69
|
- src/main/java/org/embulk/parser/apache/log/Replacement.java
|
70
|
+
- src/main/java/org/embulk/parser/apache/log/SimpleDateFormatTimestampLogElement.java
|
69
71
|
- src/main/java/org/embulk/parser/apache/log/StringLogElement.java
|
70
72
|
- src/main/java/org/embulk/parser/apache/log/StringLogElementFactory.java
|
71
73
|
- src/main/java/org/embulk/parser/apache/log/TimestampLogElement.java
|
@@ -82,12 +84,14 @@ files:
|
|
82
84
|
- src/test/resources/data/access_log_2_combined
|
83
85
|
- src/test/resources/data/access_log_combined
|
84
86
|
- src/test/resources/data/access_log_common
|
87
|
+
- src/test/resources/data/access_log_custom_time_format
|
85
88
|
- src/test/resources/resource.txt
|
86
89
|
- src/test/resources/temp/dummy
|
87
90
|
- src/test/resources/yml/test_combined.yml
|
88
91
|
- src/test/resources/yml/test_combined2.yml
|
89
92
|
- src/test/resources/yml/test_common.yml
|
90
|
-
-
|
93
|
+
- src/test/resources/yml/test_custom_time_format.yml
|
94
|
+
- classpath/embulk-parser-apache-custom-log-0.4.0.jar
|
91
95
|
homepage: https://github.com/jami-i/embulk-parser-apache-custom-log
|
92
96
|
licenses:
|
93
97
|
- MIT
|