embulk-parser-apache-log 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +4 -0
- data/README.md +2 -5
- data/build.gradle +32 -3
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/example/config.yml +9 -0
- data/example/sample.log +5000 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +1 -2
- data/gradlew +43 -35
- data/src/main/java/org/embulk/parser/ApacheLogParserPlugin.java +28 -17
- metadata +7 -3
Binary file
|
@@ -1,6 +1,5 @@
|
|
1
|
-
#Wed Feb 04 13:46:12 PST 2015
|
2
1
|
distributionBase=GRADLE_USER_HOME
|
3
2
|
distributionPath=wrapper/dists
|
4
3
|
zipStoreBase=GRADLE_USER_HOME
|
5
4
|
zipStorePath=wrapper/dists
|
6
|
-
distributionUrl=https\://services.gradle.org/distributions/gradle-
|
5
|
+
distributionUrl=https\://services.gradle.org/distributions/gradle-4.1-bin.zip
|
data/gradlew
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env
|
1
|
+
#!/usr/bin/env sh
|
2
2
|
|
3
3
|
##############################################################################
|
4
4
|
##
|
@@ -6,20 +6,38 @@
|
|
6
6
|
##
|
7
7
|
##############################################################################
|
8
8
|
|
9
|
-
#
|
10
|
-
|
9
|
+
# Attempt to set APP_HOME
|
10
|
+
# Resolve links: $0 may be a link
|
11
|
+
PRG="$0"
|
12
|
+
# Need this for relative symlinks.
|
13
|
+
while [ -h "$PRG" ] ; do
|
14
|
+
ls=`ls -ld "$PRG"`
|
15
|
+
link=`expr "$ls" : '.*-> \(.*\)$'`
|
16
|
+
if expr "$link" : '/.*' > /dev/null; then
|
17
|
+
PRG="$link"
|
18
|
+
else
|
19
|
+
PRG=`dirname "$PRG"`"/$link"
|
20
|
+
fi
|
21
|
+
done
|
22
|
+
SAVED="`pwd`"
|
23
|
+
cd "`dirname \"$PRG\"`/" >/dev/null
|
24
|
+
APP_HOME="`pwd -P`"
|
25
|
+
cd "$SAVED" >/dev/null
|
11
26
|
|
12
27
|
APP_NAME="Gradle"
|
13
28
|
APP_BASE_NAME=`basename "$0"`
|
14
29
|
|
30
|
+
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
31
|
+
DEFAULT_JVM_OPTS=""
|
32
|
+
|
15
33
|
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
16
34
|
MAX_FD="maximum"
|
17
35
|
|
18
|
-
warn (
|
36
|
+
warn () {
|
19
37
|
echo "$*"
|
20
38
|
}
|
21
39
|
|
22
|
-
die (
|
40
|
+
die () {
|
23
41
|
echo
|
24
42
|
echo "$*"
|
25
43
|
echo
|
@@ -30,6 +48,7 @@ die ( ) {
|
|
30
48
|
cygwin=false
|
31
49
|
msys=false
|
32
50
|
darwin=false
|
51
|
+
nonstop=false
|
33
52
|
case "`uname`" in
|
34
53
|
CYGWIN* )
|
35
54
|
cygwin=true
|
@@ -40,31 +59,11 @@ case "`uname`" in
|
|
40
59
|
MINGW* )
|
41
60
|
msys=true
|
42
61
|
;;
|
62
|
+
NONSTOP* )
|
63
|
+
nonstop=true
|
64
|
+
;;
|
43
65
|
esac
|
44
66
|
|
45
|
-
# For Cygwin, ensure paths are in UNIX format before anything is touched.
|
46
|
-
if $cygwin ; then
|
47
|
-
[ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
|
48
|
-
fi
|
49
|
-
|
50
|
-
# Attempt to set APP_HOME
|
51
|
-
# Resolve links: $0 may be a link
|
52
|
-
PRG="$0"
|
53
|
-
# Need this for relative symlinks.
|
54
|
-
while [ -h "$PRG" ] ; do
|
55
|
-
ls=`ls -ld "$PRG"`
|
56
|
-
link=`expr "$ls" : '.*-> \(.*\)$'`
|
57
|
-
if expr "$link" : '/.*' > /dev/null; then
|
58
|
-
PRG="$link"
|
59
|
-
else
|
60
|
-
PRG=`dirname "$PRG"`"/$link"
|
61
|
-
fi
|
62
|
-
done
|
63
|
-
SAVED="`pwd`"
|
64
|
-
cd "`dirname \"$PRG\"`/" >&-
|
65
|
-
APP_HOME="`pwd -P`"
|
66
|
-
cd "$SAVED" >&-
|
67
|
-
|
68
67
|
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
69
68
|
|
70
69
|
# Determine the Java command to use to start the JVM.
|
@@ -90,7 +89,7 @@ location of your Java installation."
|
|
90
89
|
fi
|
91
90
|
|
92
91
|
# Increase the maximum file descriptors if we can.
|
93
|
-
if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
|
92
|
+
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
|
94
93
|
MAX_FD_LIMIT=`ulimit -H -n`
|
95
94
|
if [ $? -eq 0 ] ; then
|
96
95
|
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
|
@@ -114,6 +113,7 @@ fi
|
|
114
113
|
if $cygwin ; then
|
115
114
|
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
|
116
115
|
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
|
116
|
+
JAVACMD=`cygpath --unix "$JAVACMD"`
|
117
117
|
|
118
118
|
# We build the pattern for arguments to be converted via cygpath
|
119
119
|
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
|
@@ -154,11 +154,19 @@ if $cygwin ; then
|
|
154
154
|
esac
|
155
155
|
fi
|
156
156
|
|
157
|
-
#
|
158
|
-
|
159
|
-
|
157
|
+
# Escape application args
|
158
|
+
save () {
|
159
|
+
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
|
160
|
+
echo " "
|
160
161
|
}
|
161
|
-
|
162
|
-
|
162
|
+
APP_ARGS=$(save "$@")
|
163
|
+
|
164
|
+
# Collect all arguments for the java command, following the shell quoting and substitution rules
|
165
|
+
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
|
166
|
+
|
167
|
+
# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
|
168
|
+
if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
|
169
|
+
cd "$(dirname "$0")"
|
170
|
+
fi
|
163
171
|
|
164
|
-
exec "$JAVACMD" "
|
172
|
+
exec "$JAVACMD" "$@"
|
@@ -51,7 +51,7 @@ public class ApacheLogParserPlugin
|
|
51
51
|
}
|
52
52
|
}
|
53
53
|
public interface PluginTask
|
54
|
-
extends Task, LineDecoder.DecoderTask, TimestampParser.
|
54
|
+
extends Task, LineDecoder.DecoderTask, TimestampParser.Task
|
55
55
|
{
|
56
56
|
|
57
57
|
@Config("format")
|
@@ -67,26 +67,29 @@ public class ApacheLogParserPlugin
|
|
67
67
|
ArrayList<ColumnConfig> columns = new ArrayList<ColumnConfig>();
|
68
68
|
final LogFormat format = task.getFormat();
|
69
69
|
|
70
|
-
columns.add(new ColumnConfig("remote_host",STRING ,
|
71
|
-
columns.add(new ColumnConfig("identity_check",STRING ,
|
72
|
-
columns.add(new ColumnConfig("user",STRING ,
|
73
|
-
columns.add(new ColumnConfig("datetime",TIMESTAMP,
|
74
|
-
columns.add(new ColumnConfig("method",STRING ,
|
75
|
-
columns.add(new ColumnConfig("path",STRING ,
|
76
|
-
columns.add(new ColumnConfig("protocol",STRING ,
|
77
|
-
columns.add(new ColumnConfig("status",STRING ,
|
78
|
-
columns.add(new ColumnConfig("size",STRING ,
|
70
|
+
columns.add(new ColumnConfig("remote_host",STRING ,config));
|
71
|
+
columns.add(new ColumnConfig("identity_check",STRING ,config));
|
72
|
+
columns.add(new ColumnConfig("user",STRING ,config));
|
73
|
+
columns.add(new ColumnConfig("datetime",TIMESTAMP,config));
|
74
|
+
columns.add(new ColumnConfig("method",STRING ,config));
|
75
|
+
columns.add(new ColumnConfig("path",STRING ,config));
|
76
|
+
columns.add(new ColumnConfig("protocol",STRING ,config));
|
77
|
+
columns.add(new ColumnConfig("status",STRING ,config));
|
78
|
+
columns.add(new ColumnConfig("size",STRING ,config));
|
79
79
|
|
80
80
|
// combined
|
81
81
|
if( format == LogFormat.combined ){
|
82
|
-
columns.add(new ColumnConfig("referer",STRING ,
|
83
|
-
columns.add(new ColumnConfig("user_agent",STRING ,
|
82
|
+
columns.add(new ColumnConfig("referer",STRING ,config));
|
83
|
+
columns.add(new ColumnConfig("user_agent",STRING ,config));
|
84
84
|
}
|
85
85
|
|
86
86
|
Schema schema = new SchemaConfig(columns).toSchema();
|
87
87
|
control.run(task.dump(), schema);
|
88
88
|
}
|
89
89
|
|
90
|
+
private static interface ParserIntlTask extends Task, TimestampParser.Task {}
|
91
|
+
private static interface ParserIntlColumnOption extends Task, TimestampParser.TimestampColumnOption {}
|
92
|
+
|
90
93
|
@Override
|
91
94
|
public void run(TaskSource taskSource, Schema schema,
|
92
95
|
FileInput input, PageOutput output)
|
@@ -101,7 +104,14 @@ public class ApacheLogParserPlugin
|
|
101
104
|
Pattern.CASE_INSENSITIVE
|
102
105
|
| Pattern.DOTALL);
|
103
106
|
Matcher accessLogEntryMatcher;
|
104
|
-
|
107
|
+
// TODO: Switch to a newer TimestampParser constructor after a reasonable interval.
|
108
|
+
// Traditional constructor is used here for compatibility.
|
109
|
+
final ConfigSource configSource = Exec.newConfigSource();
|
110
|
+
configSource.set("format", "%d/%b/%Y:%T %z");
|
111
|
+
configSource.set("timezone", task.getDefaultTimeZone());
|
112
|
+
final TimestampParser time_parser = new TimestampParser(
|
113
|
+
Exec.newConfigSource().loadConfig(ParserIntlTask.class),
|
114
|
+
configSource.loadConfig(ParserIntlColumnOption.class));
|
105
115
|
|
106
116
|
while( input.nextFile() ){
|
107
117
|
while(true){
|
@@ -144,17 +154,18 @@ public class ApacheLogParserPlugin
|
|
144
154
|
final String rexa = "(\\d+(?:\\.\\d+){3})"; // an IP address
|
145
155
|
final String rexs = "(\\S+)"; // a single token (no spaces)
|
146
156
|
final String rexdt = "\\[([^\\]]+)\\]"; // something between [ and ]
|
147
|
-
final String rexstr = "\"(
|
157
|
+
final String rexstr = "\"(.*?)\""; // a quoted string
|
148
158
|
final String rexi = "(\\d+)"; // unsigned integer
|
149
|
-
final String rexp = "\"(\\S+)\\s(
|
159
|
+
final String rexp = "\"(\\S+)\\s(.*?)\\s(HTTP\\/\\d+\\.\\d+)\""; // method, path, protocol
|
160
|
+
|
150
161
|
String rex;
|
151
162
|
|
152
163
|
if( type == LogFormat.combined ){
|
153
164
|
rex = "^" + String.join( " ", rexa, rexs, rexs, rexdt, rexp,
|
154
|
-
rexi,
|
165
|
+
rexi, rexs, rexstr, rexstr) + "$";
|
155
166
|
} else {
|
156
167
|
rex = "^" + String.join( " ", rexa, rexs, rexs, rexdt, rexp,
|
157
|
-
rexi,
|
168
|
+
rexi, rexs) + "$";
|
158
169
|
}
|
159
170
|
|
160
171
|
return rex;
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-apache-log
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hiroyuki Sato
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-11-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -50,6 +50,10 @@ files:
|
|
50
50
|
- LICENSE.txt
|
51
51
|
- README.md
|
52
52
|
- build.gradle
|
53
|
+
- config/checkstyle/checkstyle.xml
|
54
|
+
- config/checkstyle/default.xml
|
55
|
+
- example/config.yml
|
56
|
+
- example/sample.log
|
53
57
|
- gradle/wrapper/gradle-wrapper.jar
|
54
58
|
- gradle/wrapper/gradle-wrapper.properties
|
55
59
|
- gradlew
|
@@ -58,7 +62,7 @@ files:
|
|
58
62
|
- lib/embulk/parser/apache-log.rb
|
59
63
|
- src/main/java/org/embulk/parser/ApacheLogParserPlugin.java
|
60
64
|
- src/test/java/org/embulk/parser/TestApacheLogParserPlugin.java
|
61
|
-
- classpath/embulk-parser-apache-log-0.1.
|
65
|
+
- classpath/embulk-parser-apache-log-0.1.1.jar
|
62
66
|
homepage: https://github.com/hiroyuki-sato/embulk-parser-apache-log
|
63
67
|
licenses:
|
64
68
|
- MIT
|