embulk-parser-apache-custom-log 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/CHANGES.md +9 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +76 -0
  6. data/build.gradle +74 -0
  7. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  8. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  9. data/gradlew +164 -0
  10. data/gradlew.bat +90 -0
  11. data/lib/embulk/guess/apache-custom-log.rb +61 -0
  12. data/lib/embulk/parser/apache-custom-log.rb +3 -0
  13. data/src/main/java/org/embulk/parser/ApacheCustomLogParserPlugin.java +109 -0
  14. data/src/main/java/org/embulk/parser/apache/log/LogElement.java +41 -0
  15. data/src/main/java/org/embulk/parser/apache/log/LogElementFactory.java +6 -0
  16. data/src/main/java/org/embulk/parser/apache/log/LogFormats.java +152 -0
  17. data/src/main/java/org/embulk/parser/apache/log/LongLogElement.java +29 -0
  18. data/src/main/java/org/embulk/parser/apache/log/LongLogElementFactory.java +30 -0
  19. data/src/main/java/org/embulk/parser/apache/log/Patterns.java +23 -0
  20. data/src/main/java/org/embulk/parser/apache/log/Replacement.java +27 -0
  21. data/src/main/java/org/embulk/parser/apache/log/StringLogElement.java +33 -0
  22. data/src/main/java/org/embulk/parser/apache/log/StringLogElementFactory.java +29 -0
  23. data/src/main/java/org/embulk/parser/apache/log/TimestampLogElement.java +42 -0
  24. data/src/main/java/org/embulk/parser/apache/log/TimestampLogElementFactory.java +24 -0
  25. data/src/test/java/org/embulk/parser/TestApacheLogParserPlugin.java +162 -0
  26. data/src/test/java/org/embulk/parser/apache/log/LogFormatsTest.java +39 -0
  27. data/src/test/java/org/embulk/parser/apache/log/PatternsTest.java +120 -0
  28. data/src/test/java/org/embulk/parser/apache/log/StringLogElementFactoryTest.java +91 -0
  29. data/src/test/java/org/embulk/parser/apache/log/StringLogElementTest.java +51 -0
  30. data/src/test/java/org/embulk/tester/DummyConfigSource.java +86 -0
  31. data/src/test/java/org/embulk/tester/EmbulkPluginTester.java +52 -0
  32. data/src/test/java/org/embulk/tester/TestExtension.java +52 -0
  33. data/src/test/resources/META-INF/services/org.embulk.spi.Extension +1 -0
  34. data/src/test/resources/data/access_log_2_combined +1 -0
  35. data/src/test/resources/data/access_log_combined +2 -0
  36. data/src/test/resources/data/access_log_common +1 -0
  37. data/src/test/resources/resource.txt +0 -0
  38. data/src/test/resources/temp/dummy +0 -0
  39. data/src/test/resources/yml/test_combined.yml +13 -0
  40. data/src/test/resources/yml/test_combined2.yml +13 -0
  41. data/src/test/resources/yml/test_common.yml +13 -0
  42. metadata +115 -0
@@ -0,0 +1,91 @@
1
+ package org.embulk.parser.apache.log;
2
+
3
+ import junit.framework.TestCase;
4
+ import org.junit.Test;
5
+ import org.junit.experimental.runners.Enclosed;
6
+ import org.junit.runner.RunWith;
7
+
8
+ import static org.hamcrest.CoreMatchers.is;
9
+ import static org.junit.Assert.assertThat;
10
+
11
+ @RunWith(Enclosed.class)
12
+ public class StringLogElementFactoryTest extends TestCase {
13
+
14
+ public static class WithName{
15
+
16
+ @Test
17
+ public void testCreateWithNull() throws Exception {
18
+
19
+ StringLogElementFactory factory = new StringLogElementFactory("test-name");
20
+
21
+ StringLogElement logElement = factory.create(null);
22
+
23
+ assertThat(logElement.getName(), is("test-name"));
24
+ assertThat(logElement.getRegexp(), is("(.*)"));
25
+
26
+ }
27
+
28
+ @Test
29
+ public void testCreateWithEmptyString() throws Exception {
30
+
31
+ StringLogElementFactory factory = new StringLogElementFactory("test-name");
32
+
33
+ StringLogElement logElement = factory.create(null);
34
+
35
+ assertThat(logElement.getName(), is("test-name"));
36
+ assertThat(logElement.getRegexp(), is("(.*)"));
37
+
38
+ }
39
+
40
+ @Test
41
+ public void testCreateWithParameter() throws Exception {
42
+
43
+ StringLogElementFactory factory = new StringLogElementFactory("test-name");
44
+
45
+ StringLogElement logElement = factory.create("param");
46
+
47
+ assertThat(logElement.getName(), is("test-name-param"));
48
+ assertThat(logElement.getRegexp(), is("(.*)"));
49
+
50
+ }
51
+ }
52
+
53
+ public static class WithNameAndRegexp{
54
+
55
+ @Test
56
+ public void testCreateWithNull() throws Exception {
57
+
58
+ StringLogElementFactory factory = new StringLogElementFactory("test-name", "(.+)");
59
+
60
+ StringLogElement logElement = factory.create(null);
61
+
62
+ assertThat(logElement.getName(), is("test-name"));
63
+ assertThat(logElement.getRegexp(), is("(.+)"));
64
+
65
+ }
66
+
67
+ @Test
68
+ public void testCreateWithEmptyString() throws Exception {
69
+
70
+ StringLogElementFactory factory = new StringLogElementFactory("test-name", "(.+)");
71
+
72
+ StringLogElement logElement = factory.create(null);
73
+
74
+ assertThat(logElement.getName(), is("test-name"));
75
+ assertThat(logElement.getRegexp(), is("(.+)"));
76
+
77
+ }
78
+
79
+ @Test
80
+ public void testCreateWithParameter() throws Exception {
81
+
82
+ StringLogElementFactory factory = new StringLogElementFactory("test-name", "(.+)");
83
+
84
+ StringLogElement logElement = factory.create("param");
85
+
86
+ assertThat(logElement.getName(), is("test-name-param"));
87
+ assertThat(logElement.getRegexp(), is("(.+)"));
88
+
89
+ }
90
+ }
91
+ }
@@ -0,0 +1,51 @@
1
+ package org.embulk.parser.apache.log;
2
+
3
+ import junit.framework.TestCase;
4
+ import org.embulk.spi.PageBuilder;
5
+ import org.junit.Test;
6
+ import org.junit.experimental.runners.Enclosed;
7
+ import org.junit.runner.RunWith;
8
+
9
+ import static org.hamcrest.CoreMatchers.is;
10
+ import static org.junit.Assert.assertThat;
11
+
12
+ @RunWith(Enclosed.class)
13
+ public class StringLogElementTest extends TestCase {
14
+
15
+ public static class TestParse {
16
+
17
+ StringLogElement elem = new StringLogElement("test-elem", "(.*)");
18
+
19
+ @Test
20
+ public void testParseWithNull() throws Exception {
21
+ assertThat(elem.parse(null), is((String)null));
22
+ }
23
+
24
+ @Test
25
+ public void testParseWithEmpty() throws Exception {
26
+ assertThat(elem.parse(""), is(""));
27
+ }
28
+
29
+ @Test
30
+ public void testParseWithNonEmptyString() throws Exception {
31
+ assertThat(elem.parse("str"), is("str"));
32
+ }
33
+
34
+ @Test
35
+ public void testParseWithCLFEmptyString() throws Exception {
36
+ assertThat(elem.parse("-"), is((String)null));
37
+ }
38
+
39
+ }
40
+
41
+ public static class TestSetToPageBuilder{
42
+ @Test
43
+ public void testSetToPageBuilder() throws Exception {
44
+ //TODO implement
45
+ }
46
+ }
47
+
48
+
49
+
50
+
51
+ }
@@ -0,0 +1,86 @@
1
+ package org.embulk.tester;
2
+
3
+ import com.fasterxml.jackson.databind.JsonNode;
4
+ import com.fasterxml.jackson.databind.node.ObjectNode;
5
+ import org.embulk.config.ConfigSource;
6
+ import org.embulk.config.DataSource;
7
+
8
+ import java.util.List;
9
+ import java.util.Map;
10
+
11
+ public class DummyConfigSource implements ConfigSource {
12
+ @Override
13
+ public <T> T loadConfig(Class<T> taskType) {
14
+ return null;
15
+ }
16
+
17
+ @Override
18
+ public List<String> getAttributeNames() {
19
+ return null;
20
+ }
21
+
22
+ @Override
23
+ public Iterable<Map.Entry<String, JsonNode>> getAttributes() {
24
+ return null;
25
+ }
26
+
27
+ @Override
28
+ public boolean isEmpty() {
29
+ return false;
30
+ }
31
+
32
+ @Override
33
+ public <E> E get(Class<E> type, String attrName) {
34
+ return null;
35
+ }
36
+
37
+ @Override
38
+ public <E> E get(Class<E> type, String attrName, E defaultValue) {
39
+ return null;
40
+ }
41
+
42
+ @Override
43
+ public ConfigSource getNested(String attrName) {
44
+ return null;
45
+ }
46
+
47
+ @Override
48
+ public ConfigSource getNestedOrSetEmpty(String attrName) {
49
+ return null;
50
+ }
51
+
52
+ @Override
53
+ public ConfigSource set(String attrName, Object v) {
54
+ return null;
55
+ }
56
+
57
+ @Override
58
+ public ConfigSource setNested(String attrName, DataSource v) {
59
+ return null;
60
+ }
61
+
62
+ @Override
63
+ public ConfigSource setAll(DataSource other) {
64
+ return null;
65
+ }
66
+
67
+ @Override
68
+ public ConfigSource remove(String attrName) {
69
+ return null;
70
+ }
71
+
72
+ @Override
73
+ public ConfigSource deepCopy() {
74
+ return null;
75
+ }
76
+
77
+ @Override
78
+ public ConfigSource merge(DataSource other) {
79
+ return null;
80
+ }
81
+
82
+ @Override
83
+ public ObjectNode getObjectNode() {
84
+ return null;
85
+ }
86
+ }
@@ -0,0 +1,52 @@
1
+ package org.embulk.tester;
2
+
3
+ import java.io.BufferedReader;
4
+ import java.io.BufferedWriter;
5
+ import java.io.File;
6
+ import java.io.FileReader;
7
+ import java.io.FileWriter;
8
+ import java.util.regex.Matcher;
9
+ import java.util.regex.Pattern;
10
+
11
+ import org.embulk.command.Runner;
12
+
13
+ public class EmbulkPluginTester {
14
+
15
+ public EmbulkPluginTester(Class<?> iface, String name, Class<?> impl)
16
+ {
17
+ TestExtension.addPlugin(iface, name, impl);
18
+ }
19
+
20
+ public void run(String ymlPath) throws Exception
21
+ {
22
+ Runner runner = new Runner("{}");
23
+ runner.run(convert(ymlPath));
24
+ }
25
+
26
+ private String convert(String yml) throws Exception
27
+ {
28
+ File rootPath = new File(EmbulkPluginTester.class.getResource("/resource.txt").toURI()).getParentFile();
29
+ File ymlPath = new File(EmbulkPluginTester.class.getResource(yml).toURI());
30
+ File tempYmlPath = new File(ymlPath.getParentFile(), "temp-" + ymlPath.getName());
31
+ Pattern pathPrefixPattern = Pattern.compile("^ *path(_prefix)?: '(.*)'$");
32
+ try (BufferedReader reader = new BufferedReader(new FileReader(ymlPath))) {
33
+ try (BufferedWriter writer = new BufferedWriter(new FileWriter(tempYmlPath))) {
34
+ String line;
35
+ while ((line = reader.readLine()) != null) {
36
+ Matcher matcher = pathPrefixPattern.matcher(line);
37
+ if (matcher.matches()) {
38
+ int group = 2;
39
+ writer.write(line.substring(0, matcher.start(group)));
40
+ writer.write(new File(rootPath, matcher.group(group)).getAbsolutePath());
41
+ writer.write(line.substring(matcher.end(group)));
42
+ } else {
43
+ writer.write(line);
44
+ }
45
+ writer.newLine();
46
+ }
47
+ }
48
+ }
49
+ return tempYmlPath.getAbsolutePath();
50
+ }
51
+
52
+ }
@@ -0,0 +1,52 @@
1
+ package org.embulk.tester;
2
+
3
+ import java.util.ArrayList;
4
+ import java.util.List;
5
+
6
+ import org.embulk.config.ConfigSource;
7
+ import org.embulk.plugin.InjectedPluginSource;
8
+ import org.embulk.spi.Extension;
9
+
10
+ import com.google.common.collect.ImmutableList;
11
+ import com.google.inject.Binder;
12
+ import com.google.inject.Module;
13
+
14
+
15
+ public class TestExtension implements Extension
16
+ {
17
+ private static class PluginDefinition
18
+ {
19
+ public final Class<?> iface;
20
+ public final String name;
21
+ public final Class<?> impl;
22
+
23
+ public PluginDefinition(Class<?> iface, String name, Class<?> impl)
24
+ {
25
+ this.iface = iface;
26
+ this.name = name;
27
+ this.impl = impl;
28
+ }
29
+ }
30
+
31
+ private static List<PluginDefinition> plugins = new ArrayList<>();
32
+
33
+ public static void addPlugin(Class<?> iface, String name, Class<?> impl)
34
+ {
35
+ plugins.add(new PluginDefinition(iface, name, impl));
36
+ }
37
+
38
+ @Override
39
+ public List<Module> getModules(ConfigSource configsource) {
40
+ Module module = new Module() {
41
+
42
+ @Override
43
+ public void configure(Binder binder) {
44
+ for (PluginDefinition plugin : plugins) {
45
+ InjectedPluginSource.registerPluginTo(binder, plugin.iface, plugin.name, plugin.impl);
46
+ }
47
+ }
48
+ };
49
+ return ImmutableList.of(module);
50
+ }
51
+
52
+ }
@@ -0,0 +1 @@
1
+ org.embulk.tester.TestExtension
@@ -0,0 +1 @@
1
+ 24.93.39.209 - - [25/Jul/2015:15:31:32 +0900] "POST /search/?c=Computers HTTP/1.1" 200 88 "/category/health" "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; WOW64; Trident/4.0; GTB6; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30618; .NET4.0C)"
@@ -0,0 +1,2 @@
1
+ 127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "http://www.example.com/start.html" "Mozilla/4.08 [en] (Win98; I ;Nav)"
2
+
@@ -0,0 +1 @@
1
+ 127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326
File without changes
File without changes
@@ -0,0 +1,13 @@
1
+ in:
2
+ type: file
3
+ path_prefix: 'data/access_log_combined'
4
+ parser:
5
+ type: apache-log
6
+ format: '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"'
7
+ out:
8
+ type: file
9
+ path_prefix: '/temp/result_combined.'
10
+ file_ext: tsv
11
+ formatter:
12
+ type: csv
13
+ delimiter: "\t"
@@ -0,0 +1,13 @@
1
+ in:
2
+ type: file
3
+ path_prefix: 'data/access_log_2_combined'
4
+ parser:
5
+ type: apache-log
6
+ format: '%h %l %u %t \"%m %U%q %H\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"'
7
+ out:
8
+ type: file
9
+ path_prefix: '/temp/result_2_combined.'
10
+ file_ext: tsv
11
+ formatter:
12
+ type: csv
13
+ delimiter: "\t"
@@ -0,0 +1,13 @@
1
+ in:
2
+ type: file
3
+ path_prefix: 'data/access_log_common'
4
+ parser:
5
+ type: apache-log
6
+ format: '%h %l %u %t \"%r\" %>s %b'
7
+ out:
8
+ type: file
9
+ path_prefix: '/temp/result_common.'
10
+ file_ext: tsv
11
+ formatter:
12
+ type: csv
13
+ delimiter: "\t"
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-parser-apache-custom-log
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Hiroyuki Sato
8
+ - Osamu Ishikawa
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2015-10-01 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ name: bundler
21
+ prerelease: false
22
+ type: :development
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ~>
26
+ - !ruby/object:Gem::Version
27
+ version: '1.0'
28
+ - !ruby/object:Gem::Dependency
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ name: rake
35
+ prerelease: false
36
+ type: :development
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - '>='
40
+ - !ruby/object:Gem::Version
41
+ version: '10.0'
42
+ description: Parses Apache Custom Log files read by other file input plugins.
43
+ email:
44
+ - hiroysato@gmail.com
45
+ - bass.duo@gmail.com
46
+ executables: []
47
+ extensions: []
48
+ extra_rdoc_files: []
49
+ files:
50
+ - .gitignore
51
+ - CHANGES.md
52
+ - LICENSE.txt
53
+ - README.md
54
+ - build.gradle
55
+ - gradle/wrapper/gradle-wrapper.jar
56
+ - gradle/wrapper/gradle-wrapper.properties
57
+ - gradlew
58
+ - gradlew.bat
59
+ - lib/embulk/guess/apache-custom-log.rb
60
+ - lib/embulk/parser/apache-custom-log.rb
61
+ - src/main/java/org/embulk/parser/ApacheCustomLogParserPlugin.java
62
+ - src/main/java/org/embulk/parser/apache/log/LogElement.java
63
+ - src/main/java/org/embulk/parser/apache/log/LogElementFactory.java
64
+ - src/main/java/org/embulk/parser/apache/log/LogFormats.java
65
+ - src/main/java/org/embulk/parser/apache/log/LongLogElement.java
66
+ - src/main/java/org/embulk/parser/apache/log/LongLogElementFactory.java
67
+ - src/main/java/org/embulk/parser/apache/log/Patterns.java
68
+ - src/main/java/org/embulk/parser/apache/log/Replacement.java
69
+ - src/main/java/org/embulk/parser/apache/log/StringLogElement.java
70
+ - src/main/java/org/embulk/parser/apache/log/StringLogElementFactory.java
71
+ - src/main/java/org/embulk/parser/apache/log/TimestampLogElement.java
72
+ - src/main/java/org/embulk/parser/apache/log/TimestampLogElementFactory.java
73
+ - src/test/java/org/embulk/parser/TestApacheLogParserPlugin.java
74
+ - src/test/java/org/embulk/parser/apache/log/LogFormatsTest.java
75
+ - src/test/java/org/embulk/parser/apache/log/PatternsTest.java
76
+ - src/test/java/org/embulk/parser/apache/log/StringLogElementFactoryTest.java
77
+ - src/test/java/org/embulk/parser/apache/log/StringLogElementTest.java
78
+ - src/test/java/org/embulk/tester/DummyConfigSource.java
79
+ - src/test/java/org/embulk/tester/EmbulkPluginTester.java
80
+ - src/test/java/org/embulk/tester/TestExtension.java
81
+ - src/test/resources/META-INF/services/org.embulk.spi.Extension
82
+ - src/test/resources/data/access_log_2_combined
83
+ - src/test/resources/data/access_log_combined
84
+ - src/test/resources/data/access_log_common
85
+ - src/test/resources/resource.txt
86
+ - src/test/resources/temp/dummy
87
+ - src/test/resources/yml/test_combined.yml
88
+ - src/test/resources/yml/test_combined2.yml
89
+ - src/test/resources/yml/test_common.yml
90
+ - classpath/embulk-parser-apache-custom-log-0.2.0.jar
91
+ homepage: https://github.com/jami-i/embulk-parser-apache-custom-log
92
+ licenses:
93
+ - MIT
94
+ metadata: {}
95
+ post_install_message:
96
+ rdoc_options: []
97
+ require_paths:
98
+ - lib
99
+ required_ruby_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ required_rubygems_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - '>='
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ requirements: []
110
+ rubyforge_project:
111
+ rubygems_version: 2.1.9
112
+ signing_key:
113
+ specification_version: 4
114
+ summary: Apache Custom Log parser plugin for Embulk
115
+ test_files: []