embulk-parser-apache-custom-log 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/CHANGES.md +9 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +76 -0
  6. data/build.gradle +74 -0
  7. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  8. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  9. data/gradlew +164 -0
  10. data/gradlew.bat +90 -0
  11. data/lib/embulk/guess/apache-custom-log.rb +61 -0
  12. data/lib/embulk/parser/apache-custom-log.rb +3 -0
  13. data/src/main/java/org/embulk/parser/ApacheCustomLogParserPlugin.java +109 -0
  14. data/src/main/java/org/embulk/parser/apache/log/LogElement.java +41 -0
  15. data/src/main/java/org/embulk/parser/apache/log/LogElementFactory.java +6 -0
  16. data/src/main/java/org/embulk/parser/apache/log/LogFormats.java +152 -0
  17. data/src/main/java/org/embulk/parser/apache/log/LongLogElement.java +29 -0
  18. data/src/main/java/org/embulk/parser/apache/log/LongLogElementFactory.java +30 -0
  19. data/src/main/java/org/embulk/parser/apache/log/Patterns.java +23 -0
  20. data/src/main/java/org/embulk/parser/apache/log/Replacement.java +27 -0
  21. data/src/main/java/org/embulk/parser/apache/log/StringLogElement.java +33 -0
  22. data/src/main/java/org/embulk/parser/apache/log/StringLogElementFactory.java +29 -0
  23. data/src/main/java/org/embulk/parser/apache/log/TimestampLogElement.java +42 -0
  24. data/src/main/java/org/embulk/parser/apache/log/TimestampLogElementFactory.java +24 -0
  25. data/src/test/java/org/embulk/parser/TestApacheLogParserPlugin.java +162 -0
  26. data/src/test/java/org/embulk/parser/apache/log/LogFormatsTest.java +39 -0
  27. data/src/test/java/org/embulk/parser/apache/log/PatternsTest.java +120 -0
  28. data/src/test/java/org/embulk/parser/apache/log/StringLogElementFactoryTest.java +91 -0
  29. data/src/test/java/org/embulk/parser/apache/log/StringLogElementTest.java +51 -0
  30. data/src/test/java/org/embulk/tester/DummyConfigSource.java +86 -0
  31. data/src/test/java/org/embulk/tester/EmbulkPluginTester.java +52 -0
  32. data/src/test/java/org/embulk/tester/TestExtension.java +52 -0
  33. data/src/test/resources/META-INF/services/org.embulk.spi.Extension +1 -0
  34. data/src/test/resources/data/access_log_2_combined +1 -0
  35. data/src/test/resources/data/access_log_combined +2 -0
  36. data/src/test/resources/data/access_log_common +1 -0
  37. data/src/test/resources/resource.txt +0 -0
  38. data/src/test/resources/temp/dummy +0 -0
  39. data/src/test/resources/yml/test_combined.yml +13 -0
  40. data/src/test/resources/yml/test_combined2.yml +13 -0
  41. data/src/test/resources/yml/test_common.yml +13 -0
  42. metadata +115 -0
@@ -0,0 +1,27 @@
1
+ package org.embulk.parser.apache.log;
2
+
3
+
4
+ public class Replacement {
5
+ private final int start;
6
+ private final int end;
7
+ private final LogElement<?> logElement;
8
+
9
+ public Replacement(int start, int end, LogElement<?> logElement) {
10
+ this.logElement = logElement;
11
+ this.end = end;
12
+ this.start = start;
13
+ }
14
+
15
+ public int getStart() {
16
+ return start;
17
+ }
18
+
19
+ public int getEnd() {
20
+ return end;
21
+ }
22
+
23
+ public LogElement<?> getLogElement() {
24
+ return logElement;
25
+ }
26
+
27
+ }
@@ -0,0 +1,33 @@
1
+ package org.embulk.parser.apache.log;
2
+
3
+
4
+ import org.embulk.spi.PageBuilder;
5
+ import org.embulk.spi.type.Types;
6
+
7
+ public class StringLogElement extends LogElement<String> {
8
+
9
+ public StringLogElement(String name, String regex) {
10
+ super(name, regex, Types.STRING);
11
+ }
12
+
13
+ @Override
14
+ public String parse(String s) {
15
+ if("-".equals(s)){
16
+ return null;
17
+ }else{
18
+ return s;
19
+ }
20
+
21
+ }
22
+
23
+ @Override
24
+ public void setToPageBuilder(PageBuilder pageBuilder, int i, String value) {
25
+ String parsed = parse(value);
26
+ if(parsed != null){
27
+ pageBuilder.setString(i, parsed);
28
+ }else{
29
+ pageBuilder.setNull(i);
30
+ }
31
+
32
+ }
33
+ }
@@ -0,0 +1,29 @@
1
+ package org.embulk.parser.apache.log;
2
+
3
+
4
+ import org.apache.commons.lang3.StringUtils;
5
+
6
+ public class StringLogElementFactory implements LogElementFactory<StringLogElement>, Patterns {
7
+
8
+ private String name;
9
+ private String regexp;
10
+
11
+ public StringLogElementFactory(String name, String regexp) {
12
+ this.name = name;
13
+ this.regexp = regexp;
14
+ }
15
+
16
+ public StringLogElementFactory(String name) {
17
+ this.name = name;
18
+ this.regexp = ANY;
19
+ }
20
+
21
+ @Override
22
+ public StringLogElement create(String parameter) {
23
+ if(StringUtils.isEmpty(parameter)){
24
+ return new StringLogElement(name, regexp);
25
+ }else{
26
+ return new StringLogElement(name + "-" + parameter, regexp);
27
+ }
28
+ }
29
+ }
@@ -0,0 +1,42 @@
1
+ package org.embulk.parser.apache.log;
2
+
3
+ import org.embulk.spi.PageBuilder;
4
+ import org.embulk.spi.time.Timestamp;
5
+ import org.embulk.spi.time.TimestampParser;
6
+
7
+
8
+ import static org.embulk.spi.type.Types.TIMESTAMP;
9
+
10
+
11
+ public class TimestampLogElement extends LogElement<Timestamp> {
12
+
13
+ private final TimestampParser parser;
14
+
15
+ public TimestampLogElement(TimestampParser.Task task, String name, String regex) {
16
+ this(task, name, regex, "%d/%b/%Y:%T %z");
17
+ }
18
+
19
+ public TimestampLogElement(TimestampParser.Task task, String name, String regex, String pattern) {
20
+ super(name, regex, TIMESTAMP);
21
+ this.parser = new TimestampParser(task.getJRuby(), pattern, task.getDefaultTimeZone());
22
+ }
23
+
24
+ @Override
25
+ public Timestamp parse(String s) {
26
+ try{
27
+ return parser.parse(s);
28
+ }catch (Exception e){
29
+ return null;
30
+ }
31
+ }
32
+
33
+ @Override
34
+ public void setToPageBuilder(PageBuilder pageBuilder, int i, String value) {
35
+ Timestamp parse = parse(value);
36
+ if(parse != null){
37
+ pageBuilder.setTimestamp(i, parse);
38
+ }else{
39
+ pageBuilder.setNull(i);
40
+ }
41
+ }
42
+ }
@@ -0,0 +1,24 @@
1
+ package org.embulk.parser.apache.log;
2
+
3
+ import org.apache.commons.lang3.StringUtils;
4
+ import org.embulk.spi.time.TimestampParser;
5
+
6
+ public class TimestampLogElementFactory implements LogElementFactory<TimestampLogElement>, Patterns {
7
+
8
+ private TimestampParser.Task task;
9
+ private String name;
10
+
11
+ public TimestampLogElementFactory(TimestampParser.Task task, String name) {
12
+ this.task = task;
13
+ this.name = name;
14
+ }
15
+
16
+ @Override
17
+ public TimestampLogElement create(String parameter) {
18
+ if(StringUtils.isEmpty(parameter)){
19
+ return new TimestampLogElement(task, name, "\\[([^\\]]+)\\]");
20
+ }else{
21
+ return new TimestampLogElement(task, name, parameter);
22
+ }
23
+ }
24
+ }
@@ -0,0 +1,162 @@
1
+ package org.embulk.parser;
2
+
3
+ import org.embulk.spi.ParserPlugin;
4
+ import org.embulk.tester.EmbulkPluginTester;
5
+ import org.junit.Test;
6
+
7
+ import java.io.BufferedReader;
8
+ import java.io.File;
9
+ import java.io.FileReader;
10
+ import java.io.IOException;
11
+ import java.net.URISyntaxException;
12
+ import java.util.function.Consumer;
13
+
14
+ import static org.hamcrest.CoreMatchers.is;
15
+ import static org.junit.Assert.assertThat;
16
+
17
+ public class TestApacheLogParserPlugin {
18
+
19
+ private static EmbulkPluginTester tester = new EmbulkPluginTester(ParserPlugin.class, "apache-log", ApacheLogParserPlugin.class);
20
+
21
+ @Test
22
+ public void test_common() throws Exception {
23
+ tester.run("/yml/test_common.yml");
24
+
25
+ assertResult(
26
+ "/temp/result_common.000.00.tsv",
27
+ cols -> {
28
+ String[] expected = new String[]{
29
+ "remote-host",
30
+ "remote-log-name",
31
+ "request-user",
32
+ "request-time",
33
+ "request-line",
34
+ "response-status",
35
+ "response-bytes"
36
+ };
37
+ assertThat(cols, is(expected));
38
+ },
39
+ cols -> {
40
+ String[] expected = new String[]{
41
+ "127.0.0.1",
42
+ "",
43
+ "frank",
44
+ "2000-10-10 20:55:36.000000 +0000",
45
+ "GET /apache_pb.gif HTTP/1.0",
46
+ "200",
47
+ "2326"
48
+ };
49
+ assertThat(cols, is(expected));
50
+ }
51
+ );
52
+
53
+ }
54
+
55
+ @Test
56
+ public void test_combined() throws Exception {
57
+ tester.run("/yml/test_combined.yml");
58
+
59
+ assertResult(
60
+ "/temp/result_combined.000.00.tsv",
61
+ cols -> {
62
+ String[] expected = new String[]{
63
+ "remote-host",
64
+ "remote-log-name",
65
+ "request-user",
66
+ "request-time",
67
+ "request-line",
68
+ "response-status",
69
+ "response-bytes",
70
+ "request-header-Referer",
71
+ "request-header-User-agent"
72
+ };
73
+ assertThat(cols, is(expected));
74
+ },
75
+ cols -> {
76
+ String[] expected = new String[]{
77
+ "127.0.0.1",
78
+ "",
79
+ "frank",
80
+ "2000-10-10 20:55:36.000000 +0000",
81
+ "GET /apache_pb.gif HTTP/1.0",
82
+ "200",
83
+ "2326",
84
+ "http://www.example.com/start.html",
85
+ "Mozilla/4.08 [en] (Win98; I ;Nav)"
86
+ };
87
+ assertThat(cols, is(expected));
88
+ }
89
+ );
90
+
91
+ }
92
+
93
+ @Test
94
+ public void test_combined2() throws Exception {
95
+ tester.run("/yml/test_combined2.yml");
96
+
97
+ assertResult(
98
+ "/temp/result_2_combined.000.00.tsv",
99
+ cols -> {
100
+ String[] expected = new String[]{
101
+ "remote-host",
102
+ "remote-log-name",
103
+ "request-user",
104
+ "request-time",
105
+ "request-method",
106
+ "request-path",
107
+ "request-query",
108
+ "request-protocol",
109
+ "response-status",
110
+ "response-bytes",
111
+ "request-header-Referer",
112
+ "request-header-User-agent"
113
+ };
114
+ assertThat(cols, is(expected));
115
+ },
116
+ cols -> {
117
+ String[] expected = new String[]{
118
+ "24.93.39.209",
119
+ "",
120
+ "",
121
+ "2015-07-25 06:31:32.000000 +0000",
122
+ "POST",
123
+ "/search/",
124
+ "?c=Computers",
125
+ "HTTP/1.1",
126
+ "200",
127
+ "88",
128
+ "/category/health",
129
+ "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; WOW64; Trident/4.0; GTB6; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30618; .NET4.0C)"
130
+ };
131
+ assertThat(cols, is(expected));
132
+ }
133
+ );
134
+
135
+ }
136
+
137
+ @SafeVarargs
138
+ private final void assertResult(String path, Consumer<String[]> headerAssert, Consumer<String[]>... bodyHeadAsserts) throws URISyntaxException, IOException {
139
+
140
+ File resultFile = new File(TestApacheLogParserPlugin.class.getResource(path).toURI());
141
+
142
+ try (BufferedReader reader = new BufferedReader(new FileReader(resultFile))) {
143
+
144
+ String[] headerLine = reader.readLine().split("\t");
145
+
146
+ for (Consumer<String[]> bodyHeadAssert : bodyHeadAsserts) {
147
+ String[] bodyHeadLine = reader.readLine().split("\t");
148
+
149
+ assertThat("body column length mismatch.", bodyHeadLine.length, is(headerLine.length));
150
+
151
+ headerAssert.accept(headerLine);
152
+
153
+ bodyHeadAssert.accept(bodyHeadLine);
154
+ }
155
+
156
+ }
157
+
158
+
159
+ }
160
+
161
+
162
+ }
@@ -0,0 +1,39 @@
1
+ package org.embulk.parser.apache.log;
2
+
3
+ import junit.framework.TestCase;
4
+ import org.embulk.spi.time.TimestampParser;
5
+ import org.joda.time.DateTimeZone;
6
+ import org.jruby.embed.ScriptingContainer;
7
+
8
+ import java.util.regex.Pattern;
9
+
10
+ public class LogFormatsTest extends TestCase {
11
+
12
+ public void testLogFormat2Regexp() throws Exception {
13
+
14
+ String format = "%!100<v %!100,200<v %100,200,300>v %!100,200,300>{hogeHoge}v %v %{X-Forwarded-For}i %t %{%D}t %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %I %O %D";
15
+
16
+ LogFormats logFormats = new LogFormats(new TimestampParser.Task() {
17
+ @Override
18
+ public DateTimeZone getDefaultTimeZone() {
19
+ return DateTimeZone.UTC;
20
+ }
21
+
22
+ @Override
23
+ public String getDefaultTimestampFormat() {
24
+ return "\"%Y-%m-%d %H:%M:%S.%N %z\"";
25
+ }
26
+
27
+ @Override
28
+ public ScriptingContainer getJRuby() {
29
+ return new ScriptingContainer();
30
+ }
31
+ });
32
+
33
+ String s = logFormats.logFormat2RegexpString(format);
34
+
35
+ System.out.println(Pattern.quote(s));
36
+
37
+
38
+ }
39
+ }
@@ -0,0 +1,120 @@
1
+ package org.embulk.parser.apache.log;
2
+
3
+ import junit.framework.TestCase;
4
+ import org.junit.Test;
5
+ import org.junit.experimental.runners.Enclosed;
6
+ import org.junit.runner.RunWith;
7
+
8
+ import java.util.regex.Pattern;
9
+
10
+ import static org.hamcrest.CoreMatchers.is;
11
+ import static org.junit.Assert.assertThat;
12
+
13
+ @RunWith(Enclosed.class)
14
+ public class PatternsTest extends TestCase {
15
+
16
+ public static class Test_IP_ADDRESS {
17
+ Pattern pattern = Pattern.compile(Patterns.IP_ADDRESS);
18
+ @Test
19
+ public void test_match_with_ipv4(){
20
+ assertThat(pattern.matcher("127.0.0.1").matches(), is(true));
21
+ assertThat(pattern.matcher("255.255.255.255").matches(), is(true));
22
+ }
23
+ }
24
+
25
+ public static class Test_LONG {
26
+ Pattern pattern = Pattern.compile(Patterns.LONG);
27
+ @Test
28
+ public void test_match_with_long(){
29
+ assertThat(pattern.matcher("1").matches(), is(true));
30
+ assertThat(pattern.matcher("-1").matches(), is(true));
31
+ assertThat(pattern.matcher("-").matches(), is(true));
32
+
33
+ assertThat(pattern.matcher("a").matches(), is(false));
34
+ }
35
+ }
36
+
37
+ public static class Test_ANY {
38
+ Pattern pattern = Pattern.compile(Patterns.ANY);
39
+ @Test
40
+ public void test_match_with_long(){
41
+ assertThat(pattern.matcher("1").matches(), is(true));
42
+ assertThat(pattern.matcher("-1").matches(), is(true));
43
+ assertThat(pattern.matcher("-").matches(), is(true));
44
+
45
+ assertThat(pattern.matcher("a").matches(), is(true));
46
+ assertThat(pattern.matcher("").matches(), is(true));
47
+ }
48
+ }
49
+
50
+ public static class Test_PATH {
51
+ Pattern pattern = Pattern.compile(Patterns.PATH);
52
+ @Test
53
+ public void test_match_with_long(){
54
+ assertThat(pattern.matcher("/1").matches(), is(true));
55
+ assertThat(pattern.matcher("/abc/123").matches(), is(true));
56
+
57
+ assertThat(pattern.matcher("").matches(), is(false));
58
+ }
59
+ }
60
+
61
+ public static class Test_QUERY {
62
+ Pattern pattern = Pattern.compile(Patterns.QUERY);
63
+ @Test
64
+ public void test_match_with_long(){
65
+ assertThat(pattern.matcher("?1").matches(), is(true));
66
+ assertThat(pattern.matcher("?abc=123&p=v#hash").matches(), is(true));
67
+
68
+ assertThat(pattern.matcher("").matches(), is(true));
69
+ }
70
+ }
71
+
72
+ public static class Test_STATUS {
73
+ Pattern pattern = Pattern.compile(Patterns.STATUS);
74
+ @Test
75
+ public void test_match_with_long(){
76
+ assertThat(pattern.matcher("100").matches(), is(true));
77
+ assertThat(pattern.matcher("200").matches(), is(true));
78
+ assertThat(pattern.matcher("302").matches(), is(true));
79
+ assertThat(pattern.matcher("404").matches(), is(true));
80
+ assertThat(pattern.matcher("500").matches(), is(true));
81
+ assertThat(pattern.matcher("999").matches(), is(true));
82
+
83
+ assertThat(pattern.matcher("99").matches(), is(false));
84
+ assertThat(pattern.matcher("099").matches(), is(false));
85
+ assertThat(pattern.matcher("1000").matches(), is(false));
86
+ }
87
+ }
88
+
89
+ public static class Test_METHOD {
90
+ Pattern pattern = Pattern.compile(Patterns.METHOD);
91
+ @Test
92
+ public void test_match_with_long(){
93
+ assertThat(pattern.matcher("HEAD").matches(), is(true));
94
+ assertThat(pattern.matcher("GET").matches(), is(true));
95
+ assertThat(pattern.matcher("POST").matches(), is(true));
96
+ assertThat(pattern.matcher("PUT").matches(), is(true));
97
+ assertThat(pattern.matcher("OPTIONS").matches(), is(true));
98
+ assertThat(pattern.matcher("TRACE").matches(), is(true));
99
+ assertThat(pattern.matcher("CONNECT").matches(), is(true));
100
+
101
+ assertThat(pattern.matcher("").matches(), is(false));
102
+ assertThat(pattern.matcher("OTHER").matches(), is(false));
103
+ }
104
+ }
105
+
106
+ public static class Test_CONN_STATUS {
107
+ Pattern pattern = Pattern.compile(Patterns.CONN_STATUS);
108
+ @Test
109
+ public void test_match_with_long(){
110
+ assertThat(pattern.matcher("X").matches(), is(true));
111
+ assertThat(pattern.matcher("-").matches(), is(true));
112
+ assertThat(pattern.matcher("+").matches(), is(true));
113
+
114
+ assertThat(pattern.matcher("").matches(), is(false));
115
+ assertThat(pattern.matcher("foo").matches(), is(false));
116
+ }
117
+ }
118
+
119
+
120
+ }