embulk-parser-apache-log 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGES.md +4 -0
- data/README.md +2 -5
- data/build.gradle +32 -3
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/example/config.yml +9 -0
- data/example/sample.log +5000 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +1 -2
- data/gradlew +43 -35
- data/src/main/java/org/embulk/parser/ApacheLogParserPlugin.java +28 -17
- metadata +7 -3
Binary file
|
@@ -1,6 +1,5 @@
|
|
1
|
-
#Wed Feb 04 13:46:12 PST 2015
|
2
1
|
distributionBase=GRADLE_USER_HOME
|
3
2
|
distributionPath=wrapper/dists
|
4
3
|
zipStoreBase=GRADLE_USER_HOME
|
5
4
|
zipStorePath=wrapper/dists
|
6
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-
|
5
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-4.1-bin.zip
|
data/gradlew
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env
|
1
|
+
#!/usr/bin/env sh
|
2
2
|
|
3
3
|
##############################################################################
|
4
4
|
##
|
@@ -6,20 +6,38 @@
|
|
6
6
|
##
|
7
7
|
##############################################################################
|
8
8
|
|
9
|
-
#
|
10
|
-
|
9
|
+
# Attempt to set APP_HOME
|
10
|
+
# Resolve links: $0 may be a link
|
11
|
+
PRG="$0"
|
12
|
+
# Need this for relative symlinks.
|
13
|
+
while [ -h "$PRG" ] ; do
|
14
|
+
ls=`ls -ld "$PRG"`
|
15
|
+
link=`expr "$ls" : '.*-> \(.*\)$'`
|
16
|
+
if expr "$link" : '/.*' > /dev/null; then
|
17
|
+
PRG="$link"
|
18
|
+
else
|
19
|
+
PRG=`dirname "$PRG"`"/$link"
|
20
|
+
fi
|
21
|
+
done
|
22
|
+
SAVED="`pwd`"
|
23
|
+
cd "`dirname \"$PRG\"`/" >/dev/null
|
24
|
+
APP_HOME="`pwd -P`"
|
25
|
+
cd "$SAVED" >/dev/null
|
11
26
|
|
12
27
|
APP_NAME="Gradle"
|
13
28
|
APP_BASE_NAME=`basename "$0"`
|
14
29
|
|
30
|
+
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
31
|
+
DEFAULT_JVM_OPTS=""
|
32
|
+
|
15
33
|
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
16
34
|
MAX_FD="maximum"
|
17
35
|
|
18
|
-
warn (
|
36
|
+
warn () {
|
19
37
|
echo "$*"
|
20
38
|
}
|
21
39
|
|
22
|
-
die (
|
40
|
+
die () {
|
23
41
|
echo
|
24
42
|
echo "$*"
|
25
43
|
echo
|
@@ -30,6 +48,7 @@ die ( ) {
|
|
30
48
|
cygwin=false
|
31
49
|
msys=false
|
32
50
|
darwin=false
|
51
|
+
nonstop=false
|
33
52
|
case "`uname`" in
|
34
53
|
CYGWIN* )
|
35
54
|
cygwin=true
|
@@ -40,31 +59,11 @@ case "`uname`" in
|
|
40
59
|
MINGW* )
|
41
60
|
msys=true
|
42
61
|
;;
|
62
|
+
NONSTOP* )
|
63
|
+
nonstop=true
|
64
|
+
;;
|
43
65
|
esac
|
44
66
|
|
45
|
-
# For Cygwin, ensure paths are in UNIX format before anything is touched.
|
46
|
-
if $cygwin ; then
|
47
|
-
[ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
|
48
|
-
fi
|
49
|
-
|
50
|
-
# Attempt to set APP_HOME
|
51
|
-
# Resolve links: $0 may be a link
|
52
|
-
PRG="$0"
|
53
|
-
# Need this for relative symlinks.
|
54
|
-
while [ -h "$PRG" ] ; do
|
55
|
-
ls=`ls -ld "$PRG"`
|
56
|
-
link=`expr "$ls" : '.*-> \(.*\)$'`
|
57
|
-
if expr "$link" : '/.*' > /dev/null; then
|
58
|
-
PRG="$link"
|
59
|
-
else
|
60
|
-
PRG=`dirname "$PRG"`"/$link"
|
61
|
-
fi
|
62
|
-
done
|
63
|
-
SAVED="`pwd`"
|
64
|
-
cd "`dirname \"$PRG\"`/" >&-
|
65
|
-
APP_HOME="`pwd -P`"
|
66
|
-
cd "$SAVED" >&-
|
67
|
-
|
68
67
|
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
69
68
|
|
70
69
|
# Determine the Java command to use to start the JVM.
|
@@ -90,7 +89,7 @@ location of your Java installation."
|
|
90
89
|
fi
|
91
90
|
|
92
91
|
# Increase the maximum file descriptors if we can.
|
93
|
-
if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
|
92
|
+
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
|
94
93
|
MAX_FD_LIMIT=`ulimit -H -n`
|
95
94
|
if [ $? -eq 0 ] ; then
|
96
95
|
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
|
@@ -114,6 +113,7 @@ fi
|
|
114
113
|
if $cygwin ; then
|
115
114
|
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
|
116
115
|
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
|
116
|
+
JAVACMD=`cygpath --unix "$JAVACMD"`
|
117
117
|
|
118
118
|
# We build the pattern for arguments to be converted via cygpath
|
119
119
|
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
|
@@ -154,11 +154,19 @@ if $cygwin ; then
|
|
154
154
|
esac
|
155
155
|
fi
|
156
156
|
|
157
|
-
#
|
158
|
-
|
159
|
-
|
157
|
+
# Escape application args
|
158
|
+
save () {
|
159
|
+
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
|
160
|
+
echo " "
|
160
161
|
}
|
161
|
-
|
162
|
-
|
162
|
+
APP_ARGS=$(save "$@")
|
163
|
+
|
164
|
+
# Collect all arguments for the java command, following the shell quoting and substitution rules
|
165
|
+
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
|
166
|
+
|
167
|
+
# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
|
168
|
+
if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
|
169
|
+
cd "$(dirname "$0")"
|
170
|
+
fi
|
163
171
|
|
164
|
-
exec "$JAVACMD" "
|
172
|
+
exec "$JAVACMD" "$@"
|
@@ -51,7 +51,7 @@ public class ApacheLogParserPlugin
|
|
51
51
|
}
|
52
52
|
}
|
53
53
|
public interface PluginTask
|
54
|
-
extends Task, LineDecoder.DecoderTask, TimestampParser.
|
54
|
+
extends Task, LineDecoder.DecoderTask, TimestampParser.Task
|
55
55
|
{
|
56
56
|
|
57
57
|
@Config("format")
|
@@ -67,26 +67,29 @@ public class ApacheLogParserPlugin
|
|
67
67
|
ArrayList<ColumnConfig> columns = new ArrayList<ColumnConfig>();
|
68
68
|
final LogFormat format = task.getFormat();
|
69
69
|
|
70
|
-
columns.add(new ColumnConfig("remote_host",STRING ,
|
71
|
-
columns.add(new ColumnConfig("identity_check",STRING ,
|
72
|
-
columns.add(new ColumnConfig("user",STRING ,
|
73
|
-
columns.add(new ColumnConfig("datetime",TIMESTAMP,
|
74
|
-
columns.add(new ColumnConfig("method",STRING ,
|
75
|
-
columns.add(new ColumnConfig("path",STRING ,
|
76
|
-
columns.add(new ColumnConfig("protocol",STRING ,
|
77
|
-
columns.add(new ColumnConfig("status",STRING ,
|
78
|
-
columns.add(new ColumnConfig("size",STRING ,
|
70
|
+
columns.add(new ColumnConfig("remote_host",STRING ,config));
|
71
|
+
columns.add(new ColumnConfig("identity_check",STRING ,config));
|
72
|
+
columns.add(new ColumnConfig("user",STRING ,config));
|
73
|
+
columns.add(new ColumnConfig("datetime",TIMESTAMP,config));
|
74
|
+
columns.add(new ColumnConfig("method",STRING ,config));
|
75
|
+
columns.add(new ColumnConfig("path",STRING ,config));
|
76
|
+
columns.add(new ColumnConfig("protocol",STRING ,config));
|
77
|
+
columns.add(new ColumnConfig("status",STRING ,config));
|
78
|
+
columns.add(new ColumnConfig("size",STRING ,config));
|
79
79
|
|
80
80
|
// combined
|
81
81
|
if( format == LogFormat.combined ){
|
82
|
-
columns.add(new ColumnConfig("referer",STRING ,
|
83
|
-
columns.add(new ColumnConfig("user_agent",STRING ,
|
82
|
+
columns.add(new ColumnConfig("referer",STRING ,config));
|
83
|
+
columns.add(new ColumnConfig("user_agent",STRING ,config));
|
84
84
|
}
|
85
85
|
|
86
86
|
Schema schema = new SchemaConfig(columns).toSchema();
|
87
87
|
control.run(task.dump(), schema);
|
88
88
|
}
|
89
89
|
|
90
|
+
private static interface ParserIntlTask extends Task, TimestampParser.Task {}
|
91
|
+
private static interface ParserIntlColumnOption extends Task, TimestampParser.TimestampColumnOption {}
|
92
|
+
|
90
93
|
@Override
|
91
94
|
public void run(TaskSource taskSource, Schema schema,
|
92
95
|
FileInput input, PageOutput output)
|
@@ -101,7 +104,14 @@ public class ApacheLogParserPlugin
|
|
101
104
|
Pattern.CASE_INSENSITIVE
|
102
105
|
| Pattern.DOTALL);
|
103
106
|
Matcher accessLogEntryMatcher;
|
104
|
-
|
107
|
+
// TODO: Switch to a newer TimestampParser constructor after a reasonable interval.
|
108
|
+
// Traditional constructor is used here for compatibility.
|
109
|
+
final ConfigSource configSource = Exec.newConfigSource();
|
110
|
+
configSource.set("format", "%d/%b/%Y:%T %z");
|
111
|
+
configSource.set("timezone", task.getDefaultTimeZone());
|
112
|
+
final TimestampParser time_parser = new TimestampParser(
|
113
|
+
Exec.newConfigSource().loadConfig(ParserIntlTask.class),
|
114
|
+
configSource.loadConfig(ParserIntlColumnOption.class));
|
105
115
|
|
106
116
|
while( input.nextFile() ){
|
107
117
|
while(true){
|
@@ -144,17 +154,18 @@ public class ApacheLogParserPlugin
|
|
144
154
|
final String rexa = "(\\d+(?:\\.\\d+){3})"; // an IP address
|
145
155
|
final String rexs = "(\\S+)"; // a single token (no spaces)
|
146
156
|
final String rexdt = "\\[([^\\]]+)\\]"; // something between [ and ]
|
147
|
-
final String rexstr = "\"(
|
157
|
+
final String rexstr = "\"(.*?)\""; // a quoted string
|
148
158
|
final String rexi = "(\\d+)"; // unsigned integer
|
149
|
-
final String rexp = "\"(\\S+)\\s(
|
159
|
+
final String rexp = "\"(\\S+)\\s(.*?)\\s(HTTP\\/\\d+\\.\\d+)\""; // method, path, protocol
|
160
|
+
|
150
161
|
String rex;
|
151
162
|
|
152
163
|
if( type == LogFormat.combined ){
|
153
164
|
rex = "^" + String.join( " ", rexa, rexs, rexs, rexdt, rexp,
|
154
|
-
rexi,
|
165
|
+
rexi, rexs, rexstr, rexstr) + "$";
|
155
166
|
} else {
|
156
167
|
rex = "^" + String.join( " ", rexa, rexs, rexs, rexdt, rexp,
|
157
|
-
rexi,
|
168
|
+
rexi, rexs) + "$";
|
158
169
|
}
|
159
170
|
|
160
171
|
return rex;
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-apache-log
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hiroyuki Sato
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-11-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -50,6 +50,10 @@ files:
|
|
50
50
|
- LICENSE.txt
|
51
51
|
- README.md
|
52
52
|
- build.gradle
|
53
|
+
- config/checkstyle/checkstyle.xml
|
54
|
+
- config/checkstyle/default.xml
|
55
|
+
- example/config.yml
|
56
|
+
- example/sample.log
|
53
57
|
- gradle/wrapper/gradle-wrapper.jar
|
54
58
|
- gradle/wrapper/gradle-wrapper.properties
|
55
59
|
- gradlew
|
@@ -58,7 +62,7 @@ files:
|
|
58
62
|
- lib/embulk/parser/apache-log.rb
|
59
63
|
- src/main/java/org/embulk/parser/ApacheLogParserPlugin.java
|
60
64
|
- src/test/java/org/embulk/parser/TestApacheLogParserPlugin.java
|
61
|
-
- classpath/embulk-parser-apache-log-0.1.
|
65
|
+
- classpath/embulk-parser-apache-log-0.1.1.jar
|
62
66
|
homepage: https://github.com/hiroyuki-sato/embulk-parser-apache-log
|
63
67
|
licenses:
|
64
68
|
- MIT
|